@@ -40,43 +40,40 @@ To perform tokenization, follow [the document of Vibrato](https://github.com/daa
4040Check the version number as shown below to use compatible models:
4141
4242``` python
43- import vibrato
44- vibrato.VIBRATO_VERSION
45- # => "0.5.0"
43+ >> > import vibrato
44+ >> > vibrato.VIBRATO_VERSION
45+ ' 0.5.0'
46+
4647```
4748
4849Examples:
4950
5051``` python
51- import vibrato
52+ >> > import vibrato
53+
54+ >> > with open (' tests/data/system.dic' , ' rb' ) as fp:
55+ ... tokenizer = vibrato.Vibrato(fp.read())
5256
53- with open (' path/to/system.dic' , ' rb' ) as fp:
54- dict_data = fp.read()
55- tokenizer = vibrato.Vibrato(dict_data)
57+ >> > tokens = tokenizer.tokenize(' 社長は火星猫だ' )
5658
57- tokens = tokenizer.tokenize(' 社長は火星猫だ' )
59+ >> > len (tokens)
60+ 5
5861
59- len ( tokens)
60- # => 5
62+ >> > tokens[ 0 ]
63+ Token { surface: " 社長 " , feature: " 名詞,普通名詞,一般,* " }
6164
62- list (tokens)
63- # => [Token { surface: "社長", feature: "名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,," },
64- # Token { surface: "は", feature: "助詞,係助詞,*,*,*,*,は,ハ,ワ,," },
65- # Token { surface: "火星", feature: "名詞,一般,*,*,*,*,火星,カセイ,カセイ,," },
66- # Token { surface: "猫", feature: "名詞,一般,*,*,*,*,猫,ネコ,ネコ,," },
67- # Token { surface: "だ", feature: "助動詞,*,*,*,特殊・ダ,基本形,だ,ダ,ダ,," }]
65+ >> > tokens[0 ].surface()
66+ ' 社長'
6867
69- tokens[0 ].surface ()
70- # => '社長 '
68+ >> > tokens[0 ].feature ()
69+ ' 名詞,普通名詞,一般,* '
7170
72- tokens[0 ].feature ()
73- # => '名詞,一般,*,*,*,*,社長,シャチョウ,シャチョー,,'
71+ >> > tokens[0 ].start ()
72+ 0
7473
75- tokens[0 ].start ()
76- # => 0
74+ >> > tokens[0 ].end ()
75+ 2
7776
78- tokens[0 ].end()
79- # => 2
8077```
8178
8279## Note for distributed models
@@ -85,22 +82,14 @@ The distributed models are compressed in zstd format. If you want to load these
8582you must decompress them outside the API.
8683
8784``` python
88- import vibrato
89- import zstandard # zstandard package in PyPI
90-
91- dctx = zstandard.ZstdDecompressor()
92- with open (' path/to/system.dic.zst' , ' rb' ) as fp:
93- dict_reader = dctx.stream_reader(fp)
94- tokenizer = vibrato.Vibrato(dict_reader.read())
95- ```
96-
97- ## Documentation
85+ >> > import vibrato
86+ >> > import zstandard # zstandard package in PyPI
9887
99- Use the help function to show the API reference.
88+ >> > dctx = zstandard.ZstdDecompressor()
89+ >> > with open (' tests/data/system.dic.zst' , ' rb' ) as fp:
90+ ... with dctx.stream_reader(fp) as dict_reader:
91+ ... tokenizer = vibrato.Vibrato(dict_reader.read())
10092
101- ``` python
102- import vibrato
103- help (vibrato)
10493```
10594
10695## License
0 commit comments