Merge pull request #317 from mason5052/codex/issue-314-deepseek-v4-models

asdek · web-flow · commit e6741137451c · 2026-05-28T00:09:25.000+04:00
fix(deepseek): update default model names to DeepSeek V4
diff --git a/README.md b/README.md
@@ -1968,23 +1968,28 @@ DEEPSEEK_SERVER_URL=https://api.deepseek.com
 # With LiteLLM proxy
 DEEPSEEK_API_KEY=your_litellm_key
 DEEPSEEK_SERVER_URL=http://litellm-proxy:4000
-DEEPSEEK_PROVIDER=deepseek  # Adds prefix to model names (deepseek/deepseek-chat) for LiteLLM
+DEEPSEEK_PROVIDER=deepseek  # Adds prefix to model names (deepseek/deepseek-v4-flash) for LiteLLM
 ```
 
 #### Supported Models
 
-PentAGI supports 2 DeepSeek-V3.2 models with tool calling, streaming, thinking modes, and context caching. Both models are used in default configuration.
+PentAGI supports 2 DeepSeek V4 models with tool calling, streaming, thinking modes, and context caching. Models marked with `*` are used in default configuration.
 
-| Model ID              | Thinking | Context | Max Output | Price (Input/Output/Cache) | Use Case                                        |
-| --------------------- | -------- | ------- | ---------- | -------------------------- | ----------------------------------------------- |
-| `deepseek-chat`*      | ❌        | 128K    | 8K         | $0.28/$0.42/$0.03          | General dialogue, code generation, tool calling |
-| `deepseek-reasoner`*  | ✅        | 128K    | 64K        | $0.28/$0.42/$0.03          | Advanced reasoning, complex logic, security analysis |
+| Model ID              | Thinking | Context | Price (Input/Output/Cache) | Use Case                                             |
+| --------------------- | -------- | ------- | -------------------------- | ---------------------------------------------------- |
+| `deepseek-v4-flash`*  | ❌        | 1M      | $0.14/$0.28/$0.0028        | General dialogue, code generation, tool calling      |
+| `deepseek-v4-pro`*    | ✅        | 1M      | $0.435/$0.87/$0.003625     | Advanced reasoning, complex logic, security analysis |
 
-**Prices**: Per 1M tokens. Cache pricing is for prompt caching (10% of input cost). Models with thinking support include reinforcement learning chain-of-thought reasoning.
+**Prices**: Per 1M tokens. Cache pricing applies to prompt tokens served from cache and is heavily discounted versus input price. Models with thinking support include reinforcement learning chain-of-thought reasoning.
+
+> The legacy model names `deepseek-chat` and `deepseek-reasoner` are scheduled
+> for deprecation by DeepSeek on 2026-07-24. Existing user configurations
+> referencing the legacy names continue to work until then; the defaults above
+> use the current V4 names.
 
 **Key Features**:
-- **Automatic Prompt Caching**: 40-60% cost reduction on repeated context (10% of input price)
-- **Extended Thinking**: Reinforcement learning CoT for complex security analysis (deepseek-reasoner)
+- **Automatic Prompt Caching**: Significant cost reduction on repeated context via cache-hit pricing far below input price
+- **Extended Thinking**: Reinforcement learning CoT for complex security analysis (deepseek-v4-pro)
 - **Strong Coding**: Optimized for code generation and exploit development
 - **Tool Calling**: Seamless integration with 20+ pentesting tools via function calling
 - **Streaming**: Real-time response streaming for interactive workflows
@@ -2967,7 +2972,7 @@ With `LLM_SERVER_PROVIDER=moonshot`, the system automatically prefixes all model
 
 When using LiteLLM proxy, set the corresponding `*_PROVIDER` variable to enable model prefixing:
 
-- `deepseek` - for DeepSeek models (`DEEPSEEK_PROVIDER=deepseek` → `deepseek/deepseek-chat`)
+- `deepseek` - for DeepSeek models (`DEEPSEEK_PROVIDER=deepseek` → `deepseek/deepseek-v4-flash`)
 - `zai` - for GLM models (`GLM_PROVIDER=zai` → `zai/glm-4`)
 - `moonshot` - for Kimi models (`KIMI_PROVIDER=moonshot` → `moonshot/kimi-k2.5`)
 - `dashscope` - for Qwen models (`QWEN_PROVIDER=dashscope` → `dashscope/qwen-plus`)
@@ -2982,7 +2987,7 @@ When using LiteLLM proxy, set the corresponding `*_PROVIDER` variable to enable
 # Use DeepSeek models via LiteLLM proxy with model prefixing
 DEEPSEEK_API_KEY=your_litellm_proxy_key
 DEEPSEEK_SERVER_URL=http://litellm-proxy:4000
-DEEPSEEK_PROVIDER=deepseek  # Models become deepseek/deepseek-chat, deepseek/deepseek-reasoner for LiteLLM
+DEEPSEEK_PROVIDER=deepseek  # Models become deepseek/deepseek-v4-flash, deepseek/deepseek-v4-pro for LiteLLM
 
 # Direct DeepSeek API usage (no prefix needed)
 DEEPSEEK_API_KEY=your_deepseek_api_key
diff --git a/backend/cmd/installer/wizard/locale/locale.go b/backend/cmd/installer/wizard/locale/locale.go
@@ -495,8 +495,8 @@ Setup options: Local installation from https://10.10.10.10:11434 or cloud regist
 	LLMFormDeepSeekHelp = `DeepSeek provides advanced AI models with strong reasoning capabilities and multilingual support.
 
 Default PentAGI Models:
-• DeepSeek-Chat: Flagship model for general-purpose tasks with strong coding and reasoning capabilities
-• DeepSeek-Reasoner: Advanced reasoning model for complex security analysis
+• deepseek-v4-flash: Cost-efficient general-purpose model for dialogue, code generation, and tool calling
+• deepseek-v4-pro: Higher-tier reasoning model for complex logic, mathematical reasoning, and security analysis
 • Cost-effective pricing with competitive performance compared to leading models
 
 Key Advantages:
@@ -507,7 +507,7 @@ Key Advantages:
 
 LiteLLM Integration:
 • Set Provider Name to 'deepseek' when using LiteLLM proxy
-• Enables model prefix (e.g., deepseek/deepseek-chat) without modifying config.yml
+• Enables model prefix (e.g., deepseek/deepseek-v4-flash) without modifying config.yml
 • Optional for direct DeepSeek API usage
 
 Best for: Teams requiring multilingual support, cost-conscious deployments, Chinese language security testing
diff --git a/backend/docs/config.md b/backend/docs/config.md
@@ -610,7 +610,7 @@ These settings control the integration with various Large Language Model (LLM) p
 | DeepSeekServerURL | `DEEPSEEK_SERVER_URL` | `https://api.deepseek.com` | DeepSeek API endpoint URL                                |
 | DeepSeekProvider  | `DEEPSEEK_PROVIDER`   | *(none)*                   | Provider name prefix for LiteLLM integration (optional)  |
 
-**LiteLLM Integration**: Set `DEEPSEEK_PROVIDER=deepseek` to enable model prefixing (e.g., `deepseek/deepseek-chat`) when using LiteLLM proxy with default PentAGI configs.
+**LiteLLM Integration**: Set `DEEPSEEK_PROVIDER=deepseek` to enable model prefixing (e.g., `deepseek/deepseek-v4-flash`) when using LiteLLM proxy with default PentAGI configs.
 
 ### GLM LLM Provider
 
diff --git a/backend/docs/llms_how_to.md b/backend/docs/llms_how_to.md
@@ -1196,7 +1196,7 @@ llm, _ := openai.New(
 )
 
 resp, _ := llm.GenerateContent(ctx, messages,
-    llms.WithModel("deepseek-reasoner"),
+    llms.WithModel("deepseek-v4-pro"),
 )
 
 // Reasoning extracted from <think>...</think> tags automatically
diff --git a/backend/pkg/providers/deepseek/config.yml b/backend/pkg/providers/deepseek/config.yml
@@ -1,127 +1,142 @@
 simple:
-  model: deepseek-chat
+  model: deepseek-v4-flash
   temperature: 0.5
   top_p: 0.5
   n: 1
   max_tokens: 8192
+  extra_body:
+    thinking:
+      type: disabled
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.14
+    output: 0.28
+    cache_read: 0.0028
 
 simple_json:
-  model: deepseek-chat
+  model: deepseek-v4-flash
   temperature: 0.5
   top_p: 0.5
   n: 1
   max_tokens: 4096
   json: true
+  extra_body:
+    thinking:
+      type: disabled
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.14
+    output: 0.28
+    cache_read: 0.0028
 
 primary_agent:
-  model: deepseek-reasoner
+  model: deepseek-v4-pro
   n: 1
   max_tokens: 16384
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
 
 assistant:
-  model: deepseek-reasoner
+  model: deepseek-v4-pro
   n: 1
   max_tokens: 16384
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
 
 generator:
-  model: deepseek-reasoner
+  model: deepseek-v4-pro
   n: 1
   max_tokens: 32768
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
 
 refiner:
-  model: deepseek-reasoner
+  model: deepseek-v4-pro
   n: 1
   max_tokens: 20480
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
 
 adviser:
-  model: deepseek-chat
+  model: deepseek-v4-flash
   temperature: 0.7
   top_p: 0.8
   n: 1
   max_tokens: 8192
+  extra_body:
+    thinking:
+      type: disabled
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.14
+    output: 0.28
+    cache_read: 0.0028
 
 reflector:
-  model: deepseek-reasoner
+  model: deepseek-v4-pro
   n: 1
   max_tokens: 4096
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
 
 searcher:
-  model: deepseek-chat
+  model: deepseek-v4-flash
   temperature: 0.7
   top_p: 0.8
   n: 1
   max_tokens: 4096
+  extra_body:
+    thinking:
+      type: disabled
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.14
+    output: 0.28
+    cache_read: 0.0028
 
 enricher:
-  model: deepseek-chat
+  model: deepseek-v4-flash
   temperature: 0.7
   top_p: 0.8
   n: 1
   max_tokens: 4096
+  extra_body:
+    thinking:
+      type: disabled
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.14
+    output: 0.28
+    cache_read: 0.0028
 
 coder:
-  model: deepseek-reasoner
+  model: deepseek-v4-pro
   n: 1
   max_tokens: 20480
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
 
 installer:
-  model: deepseek-reasoner
+  model: deepseek-v4-pro
   n: 1
   max_tokens: 16384
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
 
 pentester:
-  model: deepseek-reasoner
+  model: deepseek-v4-pro
   n: 1
   max_tokens: 16384
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
diff --git a/backend/pkg/providers/deepseek/deepseek.go b/backend/pkg/providers/deepseek/deepseek.go
@@ -19,7 +19,7 @@ import (
 //go:embed config.yml models.yml
 var configFS embed.FS
 
-const DeepSeekAgentModel = "deepseek-chat"
+const DeepSeekAgentModel = "deepseek-v4-flash"
 
 const DeepSeekToolCallIDTemplate = "call_{r:2:d}_{r:24:b}"
 
diff --git a/backend/pkg/providers/deepseek/models.yml b/backend/pkg/providers/deepseek/models.yml
@@ -1,15 +1,15 @@
-- name: deepseek-chat
-  description: DeepSeek-V3.2 (Non-thinking Mode) - Suitable for general dialogue, code generation, and tool calling tasks. Supports JSON Output, Tool Calls, Chat Prefix Completion, and FIM Completion. 128K context, max output 8K
+- name: deepseek-v4-flash
+  description: DeepSeek V4 Flash - Cost-efficient general-purpose model suitable for dialogue, code generation, and tool calling. Supports JSON output and tool calls. 1M context, up to 384K output tokens.
   thinking: false
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.14
+    output: 0.28
+    cache_read: 0.0028
 
-- name: deepseek-reasoner
-  description: DeepSeek-V3.2 (Thinking Mode) - Advanced reasoning model with reinforcement learning chain-of-thought capabilities, suitable for complex logic, mathematical reasoning, and security analysis tasks. 128K context, max output 64K
+- name: deepseek-v4-pro
+  description: DeepSeek V4 Pro - Higher-tier reasoning model suitable for complex logic, mathematical reasoning, and security analysis. 1M context, up to 384K output tokens.
   thinking: true
   price:
-    input: 0.28
-    output: 0.42
-    cache_read: 0.028
+    input: 0.435
+    output: 0.87
+    cache_read: 0.003625
diff --git a/examples/configs/deepseek.provider.yml b/examples/configs/deepseek.provider.yml

Original file line number	Diff line number	Diff line change
`@@ -1196,7 +1196,7 @@ llm, _ := openai.New(`
`1196`	`1196`	`)`
`1197`	`1197`
`1198`	`1198`	`resp, _ := llm.GenerateContent(ctx, messages,`
`1199`		`- llms.WithModel("deepseek-reasoner"),`
	`1199`	`+ llms.WithModel("deepseek-v4-pro"),`
`1200`	`1200`	`)`
`1201`	`1201`
`1202`	`1202`	`// Reasoning extracted from <think>...</think> tags automatically`