Skip to content

Commit ad9a3c3

Browse files
committed
some update
1 parent d92b3cd commit ad9a3c3

File tree

10 files changed

+56
-7
lines changed

10 files changed

+56
-7
lines changed

bindings/python/py_src/tokenizers/__init__.pyi

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,9 @@ class Encoding:
125125
"""
126126
The :class:`~tokenizers.Encoding` represents the output of a :class:`~tokenizers.Tokenizer`.
127127
"""
128+
def __init__(self):
129+
pass
130+
128131
@property
129132
def attention_mask(self):
130133
"""
@@ -627,6 +630,9 @@ class NormalizedString:
627630
sequence: str:
628631
The string sequence used to initialize this NormalizedString
629632
"""
633+
def __init__(self, sequence):
634+
pass
635+
630636
def append(self, s):
631637
"""
632638
Append the given sequence to the string
@@ -887,6 +893,9 @@ class Regex:
887893
pass
888894

889895
class Token:
896+
def __init__(self, id, value, offsets):
897+
pass
898+
890899
@property
891900
def id(self):
892901
""" """

bindings/python/py_src/tokenizers/decoders/__init__.pyi

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,23 @@ class DecodeStream:
77
def __init__(self, ids=None, skip_special_tokens=False):
88
pass
99

10+
def step(self, tokenizer, id):
11+
"""
12+
Streaming decode step
13+
14+
Args:
15+
tokenizer (:class:`~tokenizers.Tokenizer`):
16+
The tokenizer to use for decoding
17+
id (:obj:`int` or `List[int]`):
18+
The next token id or list of token ids to add to the stream
19+
20+
21+
Returns:
22+
:obj:`Optional[str]`: The next decoded string chunk, or None if not enough
23+
tokens have been provided yet.
24+
"""
25+
pass
26+
1027
class Decoder:
1128
"""
1229
Base class for all decoders

bindings/python/py_src/tokenizers/models/__init__.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ class Model:
88
99
This class cannot be constructed directly. Please use one of the concrete models.
1010
"""
11+
def __init__(self):
12+
pass
13+
1114
def get_trainer(self):
1215
"""
1316
Get the associated :class:`~tokenizers.trainers.Trainer`

bindings/python/py_src/tokenizers/pre_tokenizers/__init__.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@ class CharDelimiterSplit(PreTokenizer):
197197
delimiter: str:
198198
The delimiter char that will be used to split input
199199
"""
200+
def __init__(self, delimiter):
201+
pass
202+
200203
@property
201204
def delimiter(self):
202205
""" """

bindings/python/src/decoders.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,19 @@ impl PyDecodeStream {
678678
prefix_index: 0,
679679
}
680680
}
681+
682+
/// Streaming decode step
683+
///
684+
/// Args:
685+
/// tokenizer (:class:`~tokenizers.Tokenizer`):
686+
/// The tokenizer to use for decoding
687+
/// id (:obj:`int` or `List[int]`):
688+
/// The next token id or list of token ids to add to the stream
689+
///
690+
///
691+
/// Returns:
692+
/// :obj:`Optional[str]`: The next decoded string chunk, or None if not enough
693+
/// tokens have been provided yet.
681694
#[pyo3(signature = (tokenizer, id), text_signature = "(self, tokenizer, id)")]
682695
fn step(&mut self, tokenizer: &PyTokenizer, id: StreamInput) -> PyResult<Option<String>> {
683696
let id: Vec<u32> = match id {

bindings/python/src/encoding.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ impl From<tk::tokenizer::Encoding> for PyEncoding {
2323
#[pymethods]
2424
impl PyEncoding {
2525
#[new]
26-
#[pyo3(text_signature = None)]
26+
#[pyo3(signature = (), text_signature = "(self)")]
2727
fn new() -> Self {
2828
Self {
2929
encoding: tk::tokenizer::Encoding::default(),

bindings/python/src/models.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ where
102102
#[pymethods]
103103
impl PyModel {
104104
#[new]
105-
#[pyo3(text_signature = None)]
105+
#[pyo3(signature = (), text_signature = "(self)")]
106106
fn __new__() -> Self {
107107
// Instantiate a default empty model. This doesn't really make sense, but we need
108108
// to be able to instantiate an empty model for pickle capabilities.

bindings/python/src/pre_tokenizers.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ impl PyCharDelimiterSplit {
507507
}
508508

509509
#[new]
510-
#[pyo3(text_signature = None)]
510+
#[pyo3(signature = (delimiter), text_signature = "(self, delimiter)")]
511511
pub fn new(delimiter: char) -> PyResult<(Self, PyPreTokenizer)> {
512512
Ok((
513513
PyCharDelimiterSplit {},

bindings/python/src/token.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@ impl From<PyToken> for Token {
1919

2020
#[pymethods]
2121
impl PyToken {
22+
/// Create a token from id, string value and byte offsets
2223
#[new]
23-
#[pyo3(text_signature = None)]
24+
#[pyo3(
25+
signature = (id, value, offsets),
26+
text_signature = "(self, id, value, offsets)"
27+
)]
2428
fn new(id: u32, value: String, offsets: (usize, usize)) -> PyToken {
2529
Token::new(id, value, offsets).into()
2630
}

bindings/python/src/utils/normalization.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,9 @@ pub struct PyNormalizedString {
205205
#[pymethods]
206206
impl PyNormalizedString {
207207
#[new]
208-
#[pyo3(text_signature = None)]
209-
fn new(s: &str) -> Self {
210-
NormalizedString::from(s).into()
208+
#[pyo3(signature = (sequence), text_signature = "(self, sequence)")]
209+
fn new(sequence: &str) -> Self {
210+
NormalizedString::from(sequence).into()
211211
}
212212

213213
/// The normalized part of the string

0 commit comments

Comments
 (0)