Skip to content

Commit 21c84f4

Browse files
authored
feat(function): Add tool streaming, XML Tool Call Parsing Support (#7865)
* feat(function): Add XML Tool Call Parsing Support Extend the function parsing system in LocalAI to support XML-style tool calls, similar to how JSON tool calls are currently parsed. This will allow models that return XML format (like <tool_call><function=name><parameter=key>value</parameter></function></tool_call>) to be properly parsed alongside text content. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * thinking before tool calls, more strict support for corner cases with no tools Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Support streaming tools Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Iterative JSON Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Iterative parsing Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Consume JSON marker Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fixup Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * add tests Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Fix pending TODOs Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Don't run other parsing with ParseRegex Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 9d3da0b commit 21c84f4

File tree

6 files changed

+5401
-9
lines changed

6 files changed

+5401
-9
lines changed

AGENTS.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,49 @@ When fixing compilation errors after upstream changes:
7777
- HTTP server uses `server_routes` with HTTP handlers
7878
- Both use the same `server_context` and task queue infrastructure
7979
- gRPC methods: `LoadModel`, `Predict`, `PredictStream`, `Embedding`, `Rerank`, `TokenizeString`, `GetMetrics`, `Health`
80+
81+
## Tool Call Parsing Maintenance
82+
83+
When working on JSON/XML tool call parsing functionality, always check llama.cpp for reference implementation and updates:
84+
85+
### Checking for XML Parsing Changes
86+
87+
1. **Review XML Format Definitions**: Check `llama.cpp/common/chat-parser-xml-toolcall.h` for `xml_tool_call_format` struct changes
88+
2. **Review Parsing Logic**: Check `llama.cpp/common/chat-parser-xml-toolcall.cpp` for parsing algorithm updates
89+
3. **Review Format Presets**: Check `llama.cpp/common/chat-parser.cpp` for new XML format presets (search for `xml_tool_call_format form`)
90+
4. **Review Model Lists**: Check `llama.cpp/common/chat.h` for `COMMON_CHAT_FORMAT_*` enum values that use XML parsing:
91+
- `COMMON_CHAT_FORMAT_GLM_4_5`
92+
- `COMMON_CHAT_FORMAT_MINIMAX_M2`
93+
- `COMMON_CHAT_FORMAT_KIMI_K2`
94+
- `COMMON_CHAT_FORMAT_QWEN3_CODER_XML`
95+
- `COMMON_CHAT_FORMAT_APRIEL_1_5`
96+
- `COMMON_CHAT_FORMAT_XIAOMI_MIMO`
97+
- Any new formats added
98+
99+
### Model Configuration Options
100+
101+
Always check `llama.cpp` for new model configuration options that should be supported in LocalAI:
102+
103+
1. **Check Server Context**: Review `llama.cpp/tools/server/server-context.cpp` for new parameters
104+
2. **Check Chat Params**: Review `llama.cpp/common/chat.h` for `common_chat_params` struct changes
105+
3. **Check Server Options**: Review `llama.cpp/tools/server/server.cpp` for command-line argument changes
106+
4. **Examples of options to check**:
107+
- `ctx_shift` - Context shifting support
108+
- `parallel_tool_calls` - Parallel tool calling
109+
- `reasoning_format` - Reasoning format options
110+
- Any new flags or parameters
111+
112+
### Implementation Guidelines
113+
114+
1. **Feature Parity**: Always aim for feature parity with llama.cpp's implementation
115+
2. **Test Coverage**: Add tests for new features matching llama.cpp's behavior
116+
3. **Documentation**: Update relevant documentation when adding new formats or options
117+
4. **Backward Compatibility**: Ensure changes don't break existing functionality
118+
119+
### Files to Monitor
120+
121+
- `llama.cpp/common/chat-parser-xml-toolcall.h` - Format definitions
122+
- `llama.cpp/common/chat-parser-xml-toolcall.cpp` - Parsing logic
123+
- `llama.cpp/common/chat-parser.cpp` - Format presets and model-specific handlers
124+
- `llama.cpp/common/chat.h` - Format enums and parameter structures
125+
- `llama.cpp/tools/server/server-context.cpp` - Server configuration options

core/http/endpoints/openai/chat.go

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,111 @@ func ChatEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator
6666
}
6767
processTools := func(noAction string, prompt string, req *schema.OpenAIRequest, config *config.ModelConfig, loader *model.ModelLoader, responses chan schema.OpenAIResponse, extraUsage bool) error {
6868
result := ""
69+
lastEmittedCount := 0
6970
_, tokenUsage, err := ComputeChoices(req, prompt, config, cl, startupOptions, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
7071
result += s
71-
// TODO: Change generated BNF grammar to be compliant with the schema so we can
72-
// stream the result token by token here.
72+
// Try incremental XML parsing for streaming support using iterative parser
73+
// This allows emitting partial tool calls as they're being generated
74+
cleanedResult := functions.CleanupLLMResult(result, config.FunctionsConfig)
75+
76+
// Determine XML format from config
77+
var xmlFormat *functions.XMLToolCallFormat
78+
if config.FunctionsConfig.XMLFormat != nil {
79+
xmlFormat = config.FunctionsConfig.XMLFormat
80+
} else if config.FunctionsConfig.XMLFormatPreset != "" {
81+
xmlFormat = functions.GetXMLFormatPreset(config.FunctionsConfig.XMLFormatPreset)
82+
}
83+
84+
// Use iterative parser for streaming (partial parsing enabled)
85+
// Try XML parsing first
86+
partialResults, parseErr := functions.ParseXMLIterative(cleanedResult, xmlFormat, true)
87+
if parseErr == nil && len(partialResults) > 0 {
88+
// Emit new XML tool calls that weren't emitted before
89+
if len(partialResults) > lastEmittedCount {
90+
for i := lastEmittedCount; i < len(partialResults); i++ {
91+
toolCall := partialResults[i]
92+
initialMessage := schema.OpenAIResponse{
93+
ID: id,
94+
Created: created,
95+
Model: req.Model,
96+
Choices: []schema.Choice{{
97+
Delta: &schema.Message{
98+
Role: "assistant",
99+
ToolCalls: []schema.ToolCall{
100+
{
101+
Index: i,
102+
ID: id,
103+
Type: "function",
104+
FunctionCall: schema.FunctionCall{
105+
Name: toolCall.Name,
106+
},
107+
},
108+
},
109+
},
110+
Index: 0,
111+
FinishReason: nil,
112+
}},
113+
Object: "chat.completion.chunk",
114+
}
115+
select {
116+
case responses <- initialMessage:
117+
default:
118+
}
119+
}
120+
lastEmittedCount = len(partialResults)
121+
}
122+
} else {
123+
// Try JSON tool call parsing for streaming
124+
// Check if the result looks like JSON tool calls
125+
jsonResults, jsonErr := functions.ParseJSONIterative(cleanedResult, true)
126+
if jsonErr == nil && len(jsonResults) > 0 {
127+
// Check if these are tool calls (have "name" and optionally "arguments")
128+
for _, jsonObj := range jsonResults {
129+
if name, ok := jsonObj["name"].(string); ok && name != "" {
130+
// This looks like a tool call
131+
args := "{}"
132+
if argsVal, ok := jsonObj["arguments"]; ok {
133+
if argsStr, ok := argsVal.(string); ok {
134+
args = argsStr
135+
} else {
136+
argsBytes, _ := json.Marshal(argsVal)
137+
args = string(argsBytes)
138+
}
139+
}
140+
// Emit tool call
141+
initialMessage := schema.OpenAIResponse{
142+
ID: id,
143+
Created: created,
144+
Model: req.Model,
145+
Choices: []schema.Choice{{
146+
Delta: &schema.Message{
147+
Role: "assistant",
148+
ToolCalls: []schema.ToolCall{
149+
{
150+
Index: lastEmittedCount,
151+
ID: id,
152+
Type: "function",
153+
FunctionCall: schema.FunctionCall{
154+
Name: name,
155+
Arguments: args,
156+
},
157+
},
158+
},
159+
},
160+
Index: 0,
161+
FinishReason: nil,
162+
}},
163+
Object: "chat.completion.chunk",
164+
}
165+
select {
166+
case responses <- initialMessage:
167+
default:
168+
}
169+
lastEmittedCount++
170+
}
171+
}
172+
}
173+
}
73174
return true
74175
})
75176
if err != nil {

0 commit comments

Comments
 (0)