Skip to content

Commit 9f0aa3d

Browse files
committed
remove unsused schemas
1 parent c54bf4f commit 9f0aa3d

File tree

1 file changed

+6
-135
lines changed

1 file changed

+6
-135
lines changed

trl/chat_template_utils.py

Lines changed: 6 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -12,138 +12,11 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import TypeVar
15+
from transformers import PreTrainedTokenizer
1616

17-
from transformers import PreTrainedTokenizer, ProcessorMixin
18-
19-
20-
# These schemas are copy-pasted from https://github.com/huggingface/transformers/blob/main/tests/utils/test_chat_parsing_utils.py
21-
cohere_schema = {
22-
"type": "object",
23-
"properties": {
24-
"role": {"const": "assistant"},
25-
"content": {"type": "string", "x-regex": r"<\|START_RESPONSE\|>(.*?)(?:<\|END_RESPONSE\|>|$)"},
26-
"thinking": {"type": "string", "x-regex": r"<\|START_THINKING\|>(.*?)(?:<\|END_THINKING\|>|$)"},
27-
"tool_calls": {
28-
"x-regex": r"<\|START_ACTION\|>(.*?)(?:<\|END_ACTION\|>|$)",
29-
"x-parser": "json",
30-
"x-parser-args": {
31-
"transform": "[*].{type: 'function', function: {name: tool_name, arguments: parameters}}"
32-
},
33-
"type": "array",
34-
"items": {
35-
"type": "object",
36-
"properties": {
37-
"type": {"const": "function"},
38-
"function": {
39-
"type": "object",
40-
"properties": {
41-
"name": {"type": "string"},
42-
"arguments": {
43-
"type": "object",
44-
"additionalProperties": {},
45-
},
46-
},
47-
},
48-
},
49-
},
50-
},
51-
},
52-
}
53-
54-
ernie_schema = {
55-
"type": "object",
56-
"properties": {
57-
"role": {"const": "assistant"},
58-
"content": {"type": "string", "x-regex": "<response>\n(.*?)\n?</response>"},
59-
"thinking": {"type": "string", "x-regex": r"(?:^|<think>\s*)(.*?)\s*<\/think>"},
60-
"tool_calls": {
61-
"x-regex-iterator": "<tool_call>(.*?)</tool_call>",
62-
"type": "array",
63-
"items": {
64-
"type": "object",
65-
"x-parser": "json",
66-
"x-parser-args": {"transform": "{type: 'function', function: @}"},
67-
"properties": {
68-
"type": {"const": "function"},
69-
"function": {
70-
"type": "object",
71-
"properties": {
72-
"name": {"type": "string"},
73-
"arguments": {
74-
"type": "object",
75-
"additionalProperties": {},
76-
},
77-
},
78-
},
79-
},
80-
},
81-
},
82-
},
83-
}
84-
85-
gpt_oss_schema = {
86-
"type": "object",
87-
"properties": {
88-
"role": {"const": "assistant"},
89-
"content": {"type": "string", "x-regex": r"<\|channel\|>final<\|message\|>(.*?)(?:<\|end\|>|$)"},
90-
"thinking": {"type": "string", "x-regex": r"<\|channel\|>analysis<\|message\|>(.*?)<\|end\|>"},
91-
"tool_calls": {
92-
"x-regex-iterator": r"<\|channel\|>commentary (to=functions\..*?<\|message\|>.*?)(?:<\|call\|>|$)",
93-
"type": "array",
94-
"items": {
95-
"type": "object",
96-
"properties": {
97-
"type": {"const": "function"},
98-
"function": {
99-
"type": "object",
100-
"properties": {
101-
"name": {"type": "string", "x-regex": r"^to=functions\.(\w+)"},
102-
"arguments": {
103-
"type": "object",
104-
"x-regex": r"<\|message\|>(.*)",
105-
"x-parser": "json",
106-
"additionalProperties": {},
107-
},
108-
},
109-
},
110-
},
111-
},
112-
},
113-
},
114-
}
115-
116-
smollm_schema = {
117-
"x-regex": r"(?:<think>\n?(?P<thinking>.+?)\n?</think>)?\s*(?:<tool_call>(?P<tool_calls>.+?)</tool_call>)?\s*(?P<content>.+?)?\s*(?:<\|im_end\|>|$)",
118-
"type": "object",
119-
"properties": {
120-
"role": {"const": "assistant"},
121-
"content": {"type": "string"},
122-
"thinking": {"type": "string"},
123-
"tool_calls": {
124-
"x-parser": "json",
125-
"x-parser-args": {"transform": "[{type: 'function', function: @}]"},
126-
"type": "array",
127-
"items": {
128-
"type": "object",
129-
"properties": {
130-
"type": {"const": "function"},
131-
"function": {
132-
"type": "object",
133-
"properties": {
134-
"name": {"type": "string"},
135-
"arguments": {
136-
"type": "object",
137-
"additionalProperties": {},
138-
},
139-
},
140-
},
141-
},
142-
},
143-
},
144-
},
145-
}
14617

18+
# Adapted and corrected versions of the schemas from:
19+
# https://github.com/huggingface/transformers/blob/main/tests/utils/test_chat_parsing_utils.py
14720
qwen3_schema = {
14821
"x-regex": r"^(?:<think>\n?(?P<reasoning_content>.+?)\n?</think>\s*)?(?P<content>.*?)(?=(?:<tool_call>|<\|im_end\|>|$))(?:<tool_call>(?P<tool_calls>.+?)</tool_call>)?\s*(?:<\|im_end\|>|$)",
14922
"type": "object",
@@ -266,10 +139,8 @@
266139
{%- endif %}
267140
{%- endif %}"""
268141

269-
TokenizerOrProcessor = TypeVar("TokenizerOrProcessor", PreTrainedTokenizer, ProcessorMixin)
270-
271142

272-
def add_response_schema(processor: TokenizerOrProcessor) -> TokenizerOrProcessor:
143+
def add_response_schema(processor: PreTrainedTokenizer) -> PreTrainedTokenizer:
273144
r"""
274145
Adds the appropriate response schema to the given tokenizer or processor based on its chat template.
275146
@@ -278,11 +149,11 @@ def add_response_schema(processor: TokenizerOrProcessor) -> TokenizerOrProcessor
278149
templates.
279150
280151
Args:
281-
processor (`TokenizerOrProcessor`):
152+
processor (`PreTrainedTokenizer`):
282153
Tokenizer or processor to which the response schema will be added.
283154
284155
Returns:
285-
`TokenizerOrProcessor`:
156+
`PreTrainedTokenizer`:
286157
Tokenizer or processor with the added response schema.
287158
288159
Examples:

0 commit comments

Comments
 (0)