Skip to content

Commit 86c1251

Browse files
authored
Test examples using doctest (#10)
1 parent 044ce52 commit 86c1251

File tree

4 files changed

+39
-52
lines changed

4 files changed

+39
-52
lines changed

.github/workflows/CI.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,13 @@ jobs:
3131
- name: Test package
3232
run: |
3333
python -m pip install --upgrade pip
34-
pip install -r requirements-dev.txt
34+
pip install -r requirements-dev.txt zstandard
35+
python -c "import zstandard;zstandard.ZstdDecompressor().copy_stream(open('tests/data/system.dic.zst','rb'),open('tests/data/system.dic','wb'))"
3536
pip install vibrato --no-index --find-links target/wheels --force-reinstall
3637
mypy --strict tests
3738
pytest
39+
python -m doctest README.md
40+
python -m doctest docs/source/examples.rst
3841
3942
pack-sdist:
4043
needs: [ test ]

README.md

Lines changed: 27 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -40,43 +40,40 @@ To perform tokenization, follow [the document of Vibrato](https://github.com/daa
4040
Check the version number as shown below to use compatible models:
4141

4242
```python
43-
import vibrato
44-
vibrato.VIBRATO_VERSION
45-
#=> "0.5.0"
43+
>>> import vibrato
44+
>>> vibrato.VIBRATO_VERSION
45+
'0.5.0'
46+
4647
```
4748

4849
Examples:
4950

5051
```python
51-
import vibrato
52+
>>> import vibrato
53+
54+
>>> with open('tests/data/system.dic', 'rb') as fp:
55+
... tokenizer = vibrato.Vibrato(fp.read())
5256

53-
with open('path/to/system.dic', 'rb') as fp:
54-
dict_data = fp.read()
55-
tokenizer = vibrato.Vibrato(dict_data)
57+
>>> tokens = tokenizer.tokenize('社長は火星猫だ')
5658

57-
tokens = tokenizer.tokenize('社長は火星猫だ')
59+
>>> len(tokens)
60+
5
5861

59-
len(tokens)
60-
#=> 5
62+
>>> tokens[0]
63+
Token { surface: "社長", feature: "名詞,普通名詞,一般,*" }
6164

62-
list(tokens)
63-
#=> [Token { surface: "社長", feature: "名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,," },
64-
# Token { surface: "は", feature: "助詞,係助詞,*,*,*,*,は,ハ,ワ,," },
65-
# Token { surface: "火星", feature: "名詞,一般,*,*,*,*,火星,カセイ,カセイ,," },
66-
# Token { surface: "猫", feature: "名詞,一般,*,*,*,*,猫,ネコ,ネコ,," },
67-
# Token { surface: "だ", feature: "助動詞,*,*,*,特殊・ダ,基本形,だ,ダ,ダ,," }]
65+
>>> tokens[0].surface()
66+
'社長'
6867

69-
tokens[0].surface()
70-
#=> '社長'
68+
>>> tokens[0].feature()
69+
'名詞,普通名詞,一般,*'
7170

72-
tokens[0].feature()
73-
#=> '名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,,'
71+
>>> tokens[0].start()
72+
0
7473

75-
tokens[0].start()
76-
#=> 0
74+
>>> tokens[0].end()
75+
2
7776

78-
tokens[0].end()
79-
#=> 2
8077
```
8178

8279
## Note for distributed models
@@ -85,22 +82,14 @@ The distributed models are compressed in zstd format. If you want to load these
8582
you must decompress them outside the API.
8683

8784
```python
88-
import vibrato
89-
import zstandard # zstandard package in PyPI
90-
91-
dctx = zstandard.ZstdDecompressor()
92-
with open('path/to/system.dic.zst', 'rb') as fp:
93-
dict_reader = dctx.stream_reader(fp)
94-
tokenizer = vibrato.Vibrato(dict_reader.read())
95-
```
96-
97-
## Documentation
85+
>>> import vibrato
86+
>>> import zstandard # zstandard package in PyPI
9887

99-
Use the help function to show the API reference.
88+
>>> dctx = zstandard.ZstdDecompressor()
89+
>>> with open('tests/data/system.dic.zst', 'rb') as fp:
90+
... with dctx.stream_reader(fp) as dict_reader:
91+
... tokenizer = vibrato.Vibrato(dict_reader.read())
10092

101-
```python
102-
import vibrato
103-
help(vibrato)
10493
```
10594

10695
## License

docs/source/examples.rst

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,22 @@ Examples:
1919
2020
>>> import vibrato
2121
22-
>>> with open('path/to/system.dic', 'rb') as fp:
23-
... dict_data = fp.read()
24-
>>> tokenizer = vibrato.Vibrato(dict_data)
22+
>>> with open('tests/data/system.dic', 'rb') as fp:
23+
... tokenizer = vibrato.Vibrato(fp.read())
2524
2625
>>> tokens = tokenizer.tokenize('社長は火星猫だ')
2726
2827
>>> len(tokens)
2928
5
3029
31-
>>> list(tokens)
32-
[Token { surface: "社長", feature: "名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,," },
33-
Token { surface: "", feature: "助詞,係助詞,*,*,*,*,は,ハ,ワ,," },
34-
Token { surface: "火星", feature: "名詞,一般,*,*,*,*,火星,カセイ,カセイ,," },
35-
Token { surface: "", feature: "名詞,一般,*,*,*,*,猫,ネコ,ネコ,," },
36-
Token { surface: "", feature: "助動詞,*,*,*,特殊・ダ,基本形,だ,ダ,ダ,," }]
30+
>>> tokens[0]
31+
Token { surface: "社長", feature: "名詞,普通名詞,一般,*" }
3732
3833
>>> tokens[0].surface()
3934
'社長'
4035
4136
>>> tokens[0].feature()
42-
'名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,,'
37+
'名詞,普通名詞,一般,*'
4338
4439
>>> tokens[0].start()
4540
0
@@ -56,6 +51,6 @@ you must decompress them outside the API:
5651
>>> import zstandard # zstandard package in PyPI
5752
5853
>>> dctx = zstandard.ZstdDecompressor()
59-
>>> with open('path/to/system.dic.zst', 'rb') as fp:
60-
... dict_reader = dctx.stream_reader(fp)
61-
>>> tokenizer = vibrato.Vibrato(dict_reader.read())
54+
>>> with open('tests/data/system.dic.zst', 'rb') as fp:
55+
... with dctx.stream_reader(fp) as dict_reader:
56+
... tokenizer = vibrato.Vibrato(dict_reader.read())

tests/data/system.dic.zst

1.01 KB
Binary file not shown.

0 commit comments

Comments
 (0)