diff --git a/CLAUDE.md b/CLAUDE.md index 9044502..f4089f2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -84,6 +84,39 @@ docker-compose up -d - `.env`: Environment variables (optional) - Config supports both legacy format and new `mcpServers` format +### Thinking Mode Configuration +The thinking mode controls how the LLM performs extended reasoning. It can be configured per provider: + +**Configuration File (JSON)**: +```json +{ + "llm": { + "provider": "anthropic", + "providers": { + "anthropic": { + "model": "claude-3-5-sonnet-20241022", + "thinkingMode": "medium", + "includeThinkingInResponse": true, + } + } + } +} +``` + +CalculateThinkingBudget calculates the token budget based on mode and max tokens. refer to https://github.com/tmc/langchaingo/blob/509308ff01c13e662d5613d3aea793fabe18edd2/llms/reasoning.go#L197 + +**Environment Variables**: +- `OPENAI_THINKING_MODE`: Override thinking mode for OpenAI +- `ANTHROPIC_THINKING_MODE`: Override thinking mode for Anthropic +- `OLLAMA_THINKING_MODE`: Override thinking mode for Ollama + +**Valid Values**: +- `none`: No extended thinking +- `low`: Minimal reasoning overhead +- `medium`: Balanced reasoning +- `high`: Deep reasoning +- `auto`: Let the model decide (default) + ### RAG Configuration RAG can be enabled via LLM provider config with `rag_enabled: true`. Supports JSON-based storage and OpenAI vector stores. diff --git a/internal/config/config.go b/internal/config/config.go index 2e7ba91..971aea0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -59,11 +59,13 @@ type LLMConfig struct { // LLMProviderConfig contains provider-specific settings type LLMProviderConfig struct { - Model string `json:"model"` - APIKey string `json:"apiKey,omitempty"` - BaseURL string `json:"baseUrl,omitempty"` - Temperature float64 `json:"temperature,omitempty"` - MaxTokens int `json:"maxTokens,omitempty"` + Model string `json:"model"` + APIKey string `json:"apiKey,omitempty"` + BaseURL string `json:"baseUrl,omitempty"` + Temperature float64 `json:"temperature,omitempty"` + MaxTokens int `json:"maxTokens,omitempty"` + ThinkingMode string `json:"thinkingMode,omitempty"` // Thinking mode: none, low, medium, high, auto (default: auto) + IncludeThinkingInResponse bool `json:"includeThinkingInResponse,omitempty"` // Include thinking content in response (default: false) } // MCPServerConfig contains MCP server configuration @@ -224,23 +226,44 @@ func (c *Config) applyLLMDefaults() { // Set default provider configurations if they don't exist if _, exists := c.LLM.Providers[ProviderOpenAI]; !exists { c.LLM.Providers[ProviderOpenAI] = LLMProviderConfig{ - Model: "gpt-4o", - Temperature: 0.7, + Model: "gpt-4o", + Temperature: 0.7, + ThinkingMode: "auto", + } + } else { + // Apply default thinking mode if not set + if providerConfig := c.LLM.Providers[ProviderOpenAI]; providerConfig.ThinkingMode == "" { + providerConfig.ThinkingMode = "auto" + c.LLM.Providers[ProviderOpenAI] = providerConfig } } if _, exists := c.LLM.Providers[ProviderAnthropic]; !exists { c.LLM.Providers[ProviderAnthropic] = LLMProviderConfig{ - Model: "claude-3-5-sonnet-20241022", - Temperature: 0.7, + Model: "claude-3-5-sonnet-20241022", + Temperature: 0.7, + ThinkingMode: "auto", + } + } else { + // Apply default thinking mode if not set + if providerConfig := c.LLM.Providers[ProviderAnthropic]; providerConfig.ThinkingMode == "" { + providerConfig.ThinkingMode = "auto" + c.LLM.Providers[ProviderAnthropic] = providerConfig } } if _, exists := c.LLM.Providers[ProviderOllama]; !exists { c.LLM.Providers[ProviderOllama] = LLMProviderConfig{ - Model: "llama3", - BaseURL: "http://localhost:11434", - Temperature: 0.7, + Model: "llama3", + BaseURL: "http://localhost:11434", + Temperature: 0.7, + ThinkingMode: "auto", + } + } else { + // Apply default thinking mode if not set + if providerConfig := c.LLM.Providers[ProviderOllama]; providerConfig.ThinkingMode == "" { + providerConfig.ThinkingMode = "auto" + c.LLM.Providers[ProviderOllama] = providerConfig } } } @@ -398,6 +421,14 @@ func (c *Config) ApplyEnvironmentVariables() { if model := os.Getenv("OPENAI_MODEL"); model != "" { openaiConfig.Model = model } + if thinkingMode := os.Getenv("OPENAI_THINKING_MODE"); thinkingMode != "" { + openaiConfig.ThinkingMode = thinkingMode + } + if includeThinking := os.Getenv("OPENAI_INCLUDE_THINKING_IN_RESPONSE"); includeThinking != "" { + if val, err := strconv.ParseBool(includeThinking); err == nil { + openaiConfig.IncludeThinkingInResponse = val + } + } c.LLM.Providers[ProviderOpenAI] = openaiConfig } @@ -409,6 +440,14 @@ func (c *Config) ApplyEnvironmentVariables() { if model := os.Getenv("ANTHROPIC_MODEL"); model != "" { anthropicConfig.Model = model } + if thinkingMode := os.Getenv("ANTHROPIC_THINKING_MODE"); thinkingMode != "" { + anthropicConfig.ThinkingMode = thinkingMode + } + if includeThinking := os.Getenv("ANTHROPIC_INCLUDE_THINKING_IN_RESPONSE"); includeThinking != "" { + if val, err := strconv.ParseBool(includeThinking); err == nil { + anthropicConfig.IncludeThinkingInResponse = val + } + } c.LLM.Providers[ProviderAnthropic] = anthropicConfig } @@ -420,6 +459,14 @@ func (c *Config) ApplyEnvironmentVariables() { if model := os.Getenv("OLLAMA_MODEL"); model != "" { ollamaConfig.Model = model } + if thinkingMode := os.Getenv("OLLAMA_THINKING_MODE"); thinkingMode != "" { + ollamaConfig.ThinkingMode = thinkingMode + } + if includeThinking := os.Getenv("OLLAMA_INCLUDE_THINKING_IN_RESPONSE"); includeThinking != "" { + if val, err := strconv.ParseBool(includeThinking); err == nil { + ollamaConfig.IncludeThinkingInResponse = val + } + } c.LLM.Providers[ProviderOllama] = ollamaConfig } // Observability overrides diff --git a/internal/handlers/llm_mcp_bridge.go b/internal/handlers/llm_mcp_bridge.go index 0ad9f87..32c6401 100644 --- a/internal/handlers/llm_mcp_bridge.go +++ b/internal/handlers/llm_mcp_bridge.go @@ -602,6 +602,12 @@ func (b *LLMMCPBridge) CallLLM(prompt, contextHistory string) (*llms.ContentChoi if providerConfig, exists := b.cfg.LLM.Providers[providerName]; exists { options.Temperature = providerConfig.Temperature options.MaxTokens = providerConfig.MaxTokens + // Set thinking mode from config (will be passed to buildOptions in LangChain provider) + if providerConfig.ThinkingMode != "" { + options.ThinkingMode = llms.ThinkingMode(providerConfig.ThinkingMode) + } + // Set include thinking in response from config + options.IncludeThinkingInResponse = providerConfig.IncludeThinkingInResponse } } diff --git a/internal/llm/langchain.go b/internal/llm/langchain.go index 3bd2848..e94102a 100644 --- a/internal/llm/langchain.go +++ b/internal/llm/langchain.go @@ -136,8 +136,35 @@ func (p *LangChainProvider) GenerateCompletion(ctx context.Context, prompt strin if len(choices) < 1 { return nil, fmt.Errorf("empty response from model") } - c1 := choices[0] - return c1, nil + var content int + var thinkingContent string + var thinkingTokens int + + for i, choice := range choices { + if choice.Content != "" { + content = i + } + if choice.GenerationInfo != nil { + if tc, ok := choice.GenerationInfo["ThinkingContent"].(string); ok && tc != "" { + thinkingContent = tc + } + // Extract thinking token usage + usage := llms.ExtractThinkingTokens(choice.GenerationInfo) + if usage != nil && usage.ThinkingTokens > 0 { + thinkingTokens = usage.ThinkingTokens + } + } + } + p.logger.DebugKV("Thinking content", "content", thinkingContent, "tokens", thinkingTokens) + + // If configured to include thinking content in response, prepend it to the content + if options.IncludeThinkingInResponse && thinkingContent != "" { + result := choices[content] + result.Content = "## Thinking Process\n\n" + thinkingContent + "\n\n## Response\n\n" + result.Content + return result, nil + } + + return choices[content], nil } // GenerateChatCompletion generates a chat completion using LangChainGo @@ -320,6 +347,15 @@ func (p *LangChainProvider) buildOptions(options ProviderOptions) []llms.CallOpt p.logger.DebugKV("Adding functions for tools", "tools", len(options.Tools)) } + // ThinkingMode: Apply if specified, otherwise use default + // https://github.com/tmc/langchaingo/blob/main/llms/reasoning.go + thinkingMode := options.ThinkingMode + if thinkingMode == "" { + thinkingMode = llms.ThinkingModeAuto // Default to Auto for better reasoning + } + callOptions = append(callOptions, llms.WithThinkingMode(thinkingMode)) + p.logger.DebugKV("Adding ThinkingMode option", "mode", thinkingMode) + // Note: options.TargetProvider is handled during factory creation, not here. return callOptions diff --git a/internal/llm/provider.go b/internal/llm/provider.go index c165bb9..fd40be6 100644 --- a/internal/llm/provider.go +++ b/internal/llm/provider.go @@ -86,11 +86,13 @@ type RequestMessage struct { // ProviderOptions contains options for LLM requests type ProviderOptions struct { - Model string // Model to use (specific model name, e.g., gpt-4o) - Temperature float64 // Temperature for response generation (0-1) - MaxTokens int // Maximum number of tokens to generate - TargetProvider string // For gateway providers: specifies the underlying provider (e.g., "openai", "ollama") - Tools []llms.Tool + Model string // Model to use (specific model name, e.g., gpt-4o) + Temperature float64 // Temperature for response generation (0-1) + MaxTokens int // Maximum number of tokens to generate + TargetProvider string // For gateway providers: specifies the underlying provider (e.g., "openai", "ollama") + Tools []llms.Tool // Tools available for the model to use + ThinkingMode llms.ThinkingMode // Thinking mode for extended reasoning (none, low, medium, high, auto) + IncludeThinkingInResponse bool // Include thinking content in response (default: false) } // LLMProvider defines the interface for language model providers