Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,39 @@ docker-compose up -d
- `.env`: Environment variables (optional)
- Config supports both legacy format and new `mcpServers` format

### Thinking Mode Configuration
The thinking mode controls how the LLM performs extended reasoning. It can be configured per provider:

**Configuration File (JSON)**:
```json
{
"llm": {
"provider": "anthropic",
"providers": {
"anthropic": {
"model": "claude-3-5-sonnet-20241022",
"thinkingMode": "medium",
"includeThinkingInResponse": true,
}
}
}
}
```

CalculateThinkingBudget calculates the token budget based on mode and max tokens. refer to https://github.com/tmc/langchaingo/blob/509308ff01c13e662d5613d3aea793fabe18edd2/llms/reasoning.go#L197

**Environment Variables**:
- `OPENAI_THINKING_MODE`: Override thinking mode for OpenAI
- `ANTHROPIC_THINKING_MODE`: Override thinking mode for Anthropic
- `OLLAMA_THINKING_MODE`: Override thinking mode for Ollama

**Valid Values**:
- `none`: No extended thinking
- `low`: Minimal reasoning overhead
- `medium`: Balanced reasoning
- `high`: Deep reasoning
- `auto`: Let the model decide (default)

### RAG Configuration
RAG can be enabled via LLM provider config with `rag_enabled: true`. Supports JSON-based storage and OpenAI vector stores.

Expand Down
71 changes: 59 additions & 12 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,13 @@ type LLMConfig struct {

// LLMProviderConfig contains provider-specific settings
type LLMProviderConfig struct {
Model string `json:"model"`
APIKey string `json:"apiKey,omitempty"`
BaseURL string `json:"baseUrl,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
MaxTokens int `json:"maxTokens,omitempty"`
Model string `json:"model"`
APIKey string `json:"apiKey,omitempty"`
BaseURL string `json:"baseUrl,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
MaxTokens int `json:"maxTokens,omitempty"`
ThinkingMode string `json:"thinkingMode,omitempty"` // Thinking mode: none, low, medium, high, auto (default: auto)
IncludeThinkingInResponse bool `json:"includeThinkingInResponse,omitempty"` // Include thinking content in response (default: false)
}

// MCPServerConfig contains MCP server configuration
Expand Down Expand Up @@ -224,23 +226,44 @@ func (c *Config) applyLLMDefaults() {
// Set default provider configurations if they don't exist
if _, exists := c.LLM.Providers[ProviderOpenAI]; !exists {
c.LLM.Providers[ProviderOpenAI] = LLMProviderConfig{
Model: "gpt-4o",
Temperature: 0.7,
Model: "gpt-4o",
Temperature: 0.7,
ThinkingMode: "auto",
}
} else {
// Apply default thinking mode if not set
if providerConfig := c.LLM.Providers[ProviderOpenAI]; providerConfig.ThinkingMode == "" {
providerConfig.ThinkingMode = "auto"
c.LLM.Providers[ProviderOpenAI] = providerConfig
}
}

if _, exists := c.LLM.Providers[ProviderAnthropic]; !exists {
c.LLM.Providers[ProviderAnthropic] = LLMProviderConfig{
Model: "claude-3-5-sonnet-20241022",
Temperature: 0.7,
Model: "claude-3-5-sonnet-20241022",
Temperature: 0.7,
ThinkingMode: "auto",
}
} else {
// Apply default thinking mode if not set
if providerConfig := c.LLM.Providers[ProviderAnthropic]; providerConfig.ThinkingMode == "" {
providerConfig.ThinkingMode = "auto"
c.LLM.Providers[ProviderAnthropic] = providerConfig
}
}

if _, exists := c.LLM.Providers[ProviderOllama]; !exists {
c.LLM.Providers[ProviderOllama] = LLMProviderConfig{
Model: "llama3",
BaseURL: "http://localhost:11434",
Temperature: 0.7,
Model: "llama3",
BaseURL: "http://localhost:11434",
Temperature: 0.7,
ThinkingMode: "auto",
}
} else {
// Apply default thinking mode if not set
if providerConfig := c.LLM.Providers[ProviderOllama]; providerConfig.ThinkingMode == "" {
providerConfig.ThinkingMode = "auto"
c.LLM.Providers[ProviderOllama] = providerConfig
}
}
}
Expand Down Expand Up @@ -398,6 +421,14 @@ func (c *Config) ApplyEnvironmentVariables() {
if model := os.Getenv("OPENAI_MODEL"); model != "" {
openaiConfig.Model = model
}
if thinkingMode := os.Getenv("OPENAI_THINKING_MODE"); thinkingMode != "" {
openaiConfig.ThinkingMode = thinkingMode
}
if includeThinking := os.Getenv("OPENAI_INCLUDE_THINKING_IN_RESPONSE"); includeThinking != "" {
if val, err := strconv.ParseBool(includeThinking); err == nil {
openaiConfig.IncludeThinkingInResponse = val
}
}
c.LLM.Providers[ProviderOpenAI] = openaiConfig
}

Expand All @@ -409,6 +440,14 @@ func (c *Config) ApplyEnvironmentVariables() {
if model := os.Getenv("ANTHROPIC_MODEL"); model != "" {
anthropicConfig.Model = model
}
if thinkingMode := os.Getenv("ANTHROPIC_THINKING_MODE"); thinkingMode != "" {
anthropicConfig.ThinkingMode = thinkingMode
}
if includeThinking := os.Getenv("ANTHROPIC_INCLUDE_THINKING_IN_RESPONSE"); includeThinking != "" {
if val, err := strconv.ParseBool(includeThinking); err == nil {
anthropicConfig.IncludeThinkingInResponse = val
}
}
c.LLM.Providers[ProviderAnthropic] = anthropicConfig
}

Expand All @@ -420,6 +459,14 @@ func (c *Config) ApplyEnvironmentVariables() {
if model := os.Getenv("OLLAMA_MODEL"); model != "" {
ollamaConfig.Model = model
}
if thinkingMode := os.Getenv("OLLAMA_THINKING_MODE"); thinkingMode != "" {
ollamaConfig.ThinkingMode = thinkingMode
}
if includeThinking := os.Getenv("OLLAMA_INCLUDE_THINKING_IN_RESPONSE"); includeThinking != "" {
if val, err := strconv.ParseBool(includeThinking); err == nil {
ollamaConfig.IncludeThinkingInResponse = val
}
}
c.LLM.Providers[ProviderOllama] = ollamaConfig
}
// Observability overrides
Expand Down
6 changes: 6 additions & 0 deletions internal/handlers/llm_mcp_bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,12 @@ func (b *LLMMCPBridge) CallLLM(prompt, contextHistory string) (*llms.ContentChoi
if providerConfig, exists := b.cfg.LLM.Providers[providerName]; exists {
options.Temperature = providerConfig.Temperature
options.MaxTokens = providerConfig.MaxTokens
// Set thinking mode from config (will be passed to buildOptions in LangChain provider)
if providerConfig.ThinkingMode != "" {
options.ThinkingMode = llms.ThinkingMode(providerConfig.ThinkingMode)
}
// Set include thinking in response from config
options.IncludeThinkingInResponse = providerConfig.IncludeThinkingInResponse
}
}

Expand Down
40 changes: 38 additions & 2 deletions internal/llm/langchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,35 @@ func (p *LangChainProvider) GenerateCompletion(ctx context.Context, prompt strin
if len(choices) < 1 {
return nil, fmt.Errorf("empty response from model")
}
c1 := choices[0]
return c1, nil
var content int
var thinkingContent string
var thinkingTokens int

for i, choice := range choices {
if choice.Content != "" {
content = i
}
if choice.GenerationInfo != nil {
if tc, ok := choice.GenerationInfo["ThinkingContent"].(string); ok && tc != "" {
thinkingContent = tc
}
// Extract thinking token usage
usage := llms.ExtractThinkingTokens(choice.GenerationInfo)
if usage != nil && usage.ThinkingTokens > 0 {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to check is it possible to include this token usage in langfuse trace

thinkingTokens = usage.ThinkingTokens
}
}
}
p.logger.DebugKV("Thinking content", "content", thinkingContent, "tokens", thinkingTokens)

// If configured to include thinking content in response, prepend it to the content
if options.IncludeThinkingInResponse && thinkingContent != "" {
result := choices[content]
result.Content = "## Thinking Process\n\n" + thinkingContent + "\n\n## Response\n\n" + result.Content
return result, nil
}

return choices[content], nil
}

// GenerateChatCompletion generates a chat completion using LangChainGo
Expand Down Expand Up @@ -320,6 +347,15 @@ func (p *LangChainProvider) buildOptions(options ProviderOptions) []llms.CallOpt
p.logger.DebugKV("Adding functions for tools", "tools", len(options.Tools))
}

// ThinkingMode: Apply if specified, otherwise use default
// https://github.com/tmc/langchaingo/blob/main/llms/reasoning.go
thinkingMode := options.ThinkingMode
if thinkingMode == "" {
thinkingMode = llms.ThinkingModeAuto // Default to Auto for better reasoning
}
callOptions = append(callOptions, llms.WithThinkingMode(thinkingMode))
p.logger.DebugKV("Adding ThinkingMode option", "mode", thinkingMode)

// Note: options.TargetProvider is handled during factory creation, not here.

return callOptions
Expand Down
12 changes: 7 additions & 5 deletions internal/llm/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,13 @@ type RequestMessage struct {

// ProviderOptions contains options for LLM requests
type ProviderOptions struct {
Model string // Model to use (specific model name, e.g., gpt-4o)
Temperature float64 // Temperature for response generation (0-1)
MaxTokens int // Maximum number of tokens to generate
TargetProvider string // For gateway providers: specifies the underlying provider (e.g., "openai", "ollama")
Tools []llms.Tool
Model string // Model to use (specific model name, e.g., gpt-4o)
Temperature float64 // Temperature for response generation (0-1)
MaxTokens int // Maximum number of tokens to generate
TargetProvider string // For gateway providers: specifies the underlying provider (e.g., "openai", "ollama")
Tools []llms.Tool // Tools available for the model to use
ThinkingMode llms.ThinkingMode // Thinking mode for extended reasoning (none, low, medium, high, auto)
IncludeThinkingInResponse bool // Include thinking content in response (default: false)
}

// LLMProvider defines the interface for language model providers
Expand Down
Loading