1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15- from typing import TypeVar
15+ from transformers import PreTrainedTokenizer
1616
17- from transformers import PreTrainedTokenizer , ProcessorMixin
18-
19-
20- # These schemas are copy-pasted from https://github.com/huggingface/transformers/blob/main/tests/utils/test_chat_parsing_utils.py
21- cohere_schema = {
22- "type" : "object" ,
23- "properties" : {
24- "role" : {"const" : "assistant" },
25- "content" : {"type" : "string" , "x-regex" : r"<\|START_RESPONSE\|>(.*?)(?:<\|END_RESPONSE\|>|$)" },
26- "thinking" : {"type" : "string" , "x-regex" : r"<\|START_THINKING\|>(.*?)(?:<\|END_THINKING\|>|$)" },
27- "tool_calls" : {
28- "x-regex" : r"<\|START_ACTION\|>(.*?)(?:<\|END_ACTION\|>|$)" ,
29- "x-parser" : "json" ,
30- "x-parser-args" : {
31- "transform" : "[*].{type: 'function', function: {name: tool_name, arguments: parameters}}"
32- },
33- "type" : "array" ,
34- "items" : {
35- "type" : "object" ,
36- "properties" : {
37- "type" : {"const" : "function" },
38- "function" : {
39- "type" : "object" ,
40- "properties" : {
41- "name" : {"type" : "string" },
42- "arguments" : {
43- "type" : "object" ,
44- "additionalProperties" : {},
45- },
46- },
47- },
48- },
49- },
50- },
51- },
52- }
53-
54- ernie_schema = {
55- "type" : "object" ,
56- "properties" : {
57- "role" : {"const" : "assistant" },
58- "content" : {"type" : "string" , "x-regex" : "<response>\n (.*?)\n ?</response>" },
59- "thinking" : {"type" : "string" , "x-regex" : r"(?:^|<think>\s*)(.*?)\s*<\/think>" },
60- "tool_calls" : {
61- "x-regex-iterator" : "<tool_call>(.*?)</tool_call>" ,
62- "type" : "array" ,
63- "items" : {
64- "type" : "object" ,
65- "x-parser" : "json" ,
66- "x-parser-args" : {"transform" : "{type: 'function', function: @}" },
67- "properties" : {
68- "type" : {"const" : "function" },
69- "function" : {
70- "type" : "object" ,
71- "properties" : {
72- "name" : {"type" : "string" },
73- "arguments" : {
74- "type" : "object" ,
75- "additionalProperties" : {},
76- },
77- },
78- },
79- },
80- },
81- },
82- },
83- }
84-
85- gpt_oss_schema = {
86- "type" : "object" ,
87- "properties" : {
88- "role" : {"const" : "assistant" },
89- "content" : {"type" : "string" , "x-regex" : r"<\|channel\|>final<\|message\|>(.*?)(?:<\|end\|>|$)" },
90- "thinking" : {"type" : "string" , "x-regex" : r"<\|channel\|>analysis<\|message\|>(.*?)<\|end\|>" },
91- "tool_calls" : {
92- "x-regex-iterator" : r"<\|channel\|>commentary (to=functions\..*?<\|message\|>.*?)(?:<\|call\|>|$)" ,
93- "type" : "array" ,
94- "items" : {
95- "type" : "object" ,
96- "properties" : {
97- "type" : {"const" : "function" },
98- "function" : {
99- "type" : "object" ,
100- "properties" : {
101- "name" : {"type" : "string" , "x-regex" : r"^to=functions\.(\w+)" },
102- "arguments" : {
103- "type" : "object" ,
104- "x-regex" : r"<\|message\|>(.*)" ,
105- "x-parser" : "json" ,
106- "additionalProperties" : {},
107- },
108- },
109- },
110- },
111- },
112- },
113- },
114- }
115-
116- smollm_schema = {
117- "x-regex" : r"(?:<think>\n?(?P<thinking>.+?)\n?</think>)?\s*(?:<tool_call>(?P<tool_calls>.+?)</tool_call>)?\s*(?P<content>.+?)?\s*(?:<\|im_end\|>|$)" ,
118- "type" : "object" ,
119- "properties" : {
120- "role" : {"const" : "assistant" },
121- "content" : {"type" : "string" },
122- "thinking" : {"type" : "string" },
123- "tool_calls" : {
124- "x-parser" : "json" ,
125- "x-parser-args" : {"transform" : "[{type: 'function', function: @}]" },
126- "type" : "array" ,
127- "items" : {
128- "type" : "object" ,
129- "properties" : {
130- "type" : {"const" : "function" },
131- "function" : {
132- "type" : "object" ,
133- "properties" : {
134- "name" : {"type" : "string" },
135- "arguments" : {
136- "type" : "object" ,
137- "additionalProperties" : {},
138- },
139- },
140- },
141- },
142- },
143- },
144- },
145- }
14617
18+ # Adapted and corrected versions of the schemas from:
19+ # https://github.com/huggingface/transformers/blob/main/tests/utils/test_chat_parsing_utils.py
14720qwen3_schema = {
14821 "x-regex" : r"^(?:<think>\n?(?P<reasoning_content>.+?)\n?</think>\s*)?(?P<content>.*?)(?=(?:<tool_call>|<\|im_end\|>|$))(?:<tool_call>(?P<tool_calls>.+?)</tool_call>)?\s*(?:<\|im_end\|>|$)" ,
14922 "type" : "object" ,
266139 {%- endif %}
267140{%- endif %}"""
268141
269- TokenizerOrProcessor = TypeVar ("TokenizerOrProcessor" , PreTrainedTokenizer , ProcessorMixin )
270-
271142
272- def add_response_schema (processor : TokenizerOrProcessor ) -> TokenizerOrProcessor :
143+ def add_response_schema (processor : PreTrainedTokenizer ) -> PreTrainedTokenizer :
273144 r"""
274145 Adds the appropriate response schema to the given tokenizer or processor based on its chat template.
275146
@@ -278,11 +149,11 @@ def add_response_schema(processor: TokenizerOrProcessor) -> TokenizerOrProcessor
278149 templates.
279150
280151 Args:
281- processor (`TokenizerOrProcessor `):
152+ processor (`PreTrainedTokenizer `):
282153 Tokenizer or processor to which the response schema will be added.
283154
284155 Returns:
285- `TokenizerOrProcessor `:
156+ `PreTrainedTokenizer `:
286157 Tokenizer or processor with the added response schema.
287158
288159 Examples:
0 commit comments