Skip to content

Commit e592a57

Browse files
authored
Merge branch 'router-for-me:main' into main
2 parents a862984 + 66769ec commit e592a57

File tree

17 files changed

+884
-245
lines changed

17 files changed

+884
-245
lines changed

config.example.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ routing:
7878
# When true, enable authentication for the WebSocket API (/v1/ws).
7979
ws-auth: false
8080

81+
# Streaming behavior (SSE keep-alives + safe bootstrap retries).
82+
# streaming:
83+
# keepalive-seconds: 15 # Default: 0 (disabled). <= 0 disables keep-alives.
84+
# bootstrap-retries: 1 # Default: 0 (disabled). Retries before first byte is sent.
85+
8186
# Gemini API keys
8287
# gemini-api-key:
8388
# - api-key: "AIzaSy...01"

internal/config/sdk_config.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,21 @@ type SDKConfig struct {
2222

2323
// Access holds request authentication provider configuration.
2424
Access AccessConfig `yaml:"auth,omitempty" json:"auth,omitempty"`
25+
26+
// Streaming configures server-side streaming behavior (keep-alives and safe bootstrap retries).
27+
Streaming StreamingConfig `yaml:"streaming" json:"streaming"`
28+
}
29+
30+
// StreamingConfig holds server streaming behavior configuration.
31+
type StreamingConfig struct {
32+
// KeepAliveSeconds controls how often the server emits SSE heartbeats (": keep-alive\n\n").
33+
// nil means default (15 seconds). <= 0 disables keep-alives.
34+
KeepAliveSeconds *int `yaml:"keepalive-seconds,omitempty" json:"keepalive-seconds,omitempty"`
35+
36+
// BootstrapRetries controls how many times the server may retry a streaming request before any bytes are sent,
37+
// to allow auth rotation / transient recovery.
38+
// nil means default (2). 0 disables bootstrap retries.
39+
BootstrapRetries *int `yaml:"bootstrap-retries,omitempty" json:"bootstrap-retries,omitempty"`
2540
}
2641

2742
// AccessConfig groups request authentication providers.

internal/runtime/executor/antigravity_executor.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const (
4242
antigravityModelsPath = "/v1internal:fetchAvailableModels"
4343
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
4444
antigravityClientSecret = "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"
45-
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
45+
defaultAntigravityAgent = "antigravity/1.104.0 darwin/arm64"
4646
antigravityAuthType = "antigravity"
4747
refreshSkew = 3000 * time.Second
4848
)

internal/translator/antigravity/claude/antigravity_claude_response.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ type Params struct {
3535
CandidatesTokenCount int64 // Cached candidate token count from usage metadata
3636
ThoughtsTokenCount int64 // Cached thinking token count from usage metadata
3737
TotalTokenCount int64 // Cached total token count from usage metadata
38+
CachedTokenCount int64 // Cached content token count (indicates prompt caching)
3839
HasSentFinalEvents bool // Indicates if final content/message events have been sent
3940
HasToolUse bool // Indicates if tool use was observed in the stream
4041
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
@@ -274,6 +275,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
274275
params.CandidatesTokenCount = usageResult.Get("candidatesTokenCount").Int()
275276
params.ThoughtsTokenCount = usageResult.Get("thoughtsTokenCount").Int()
276277
params.TotalTokenCount = usageResult.Get("totalTokenCount").Int()
278+
params.CachedTokenCount = usageResult.Get("cachedContentTokenCount").Int()
277279
if params.CandidatesTokenCount == 0 && params.TotalTokenCount > 0 {
278280
params.CandidatesTokenCount = params.TotalTokenCount - params.PromptTokenCount - params.ThoughtsTokenCount
279281
if params.CandidatesTokenCount < 0 {
@@ -322,6 +324,14 @@ func appendFinalEvents(params *Params, output *string, force bool) {
322324
*output = *output + "event: message_delta\n"
323325
*output = *output + "data: "
324326
delta := fmt.Sprintf(`{"type":"message_delta","delta":{"stop_reason":"%s","stop_sequence":null},"usage":{"input_tokens":%d,"output_tokens":%d}}`, stopReason, params.PromptTokenCount, usageOutputTokens)
327+
// Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working)
328+
if params.CachedTokenCount > 0 {
329+
var err error
330+
delta, err = sjson.Set(delta, "usage.cache_read_input_tokens", params.CachedTokenCount)
331+
if err != nil {
332+
log.Warnf("antigravity claude response: failed to set cache_read_input_tokens: %v", err)
333+
}
334+
}
325335
*output = *output + delta + "\n\n\n"
326336

327337
params.HasSentFinalEvents = true
@@ -361,6 +371,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
361371
candidateTokens := root.Get("response.usageMetadata.candidatesTokenCount").Int()
362372
thoughtTokens := root.Get("response.usageMetadata.thoughtsTokenCount").Int()
363373
totalTokens := root.Get("response.usageMetadata.totalTokenCount").Int()
374+
cachedTokens := root.Get("response.usageMetadata.cachedContentTokenCount").Int()
364375
outputTokens := candidateTokens + thoughtTokens
365376
if outputTokens == 0 && totalTokens > 0 {
366377
outputTokens = totalTokens - promptTokens
@@ -374,6 +385,14 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
374385
responseJSON, _ = sjson.Set(responseJSON, "model", root.Get("response.modelVersion").String())
375386
responseJSON, _ = sjson.Set(responseJSON, "usage.input_tokens", promptTokens)
376387
responseJSON, _ = sjson.Set(responseJSON, "usage.output_tokens", outputTokens)
388+
// Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working)
389+
if cachedTokens > 0 {
390+
var err error
391+
responseJSON, err = sjson.Set(responseJSON, "usage.cache_read_input_tokens", cachedTokens)
392+
if err != nil {
393+
log.Warnf("antigravity claude response: failed to set cache_read_input_tokens: %v", err)
394+
}
395+
}
377396

378397
contentArrayInitialized := false
379398
ensureContentArray := func() {

internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
"sync/atomic"
1414
"time"
1515

16+
log "github.com/sirupsen/logrus"
17+
1618
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
1719
"github.com/tidwall/gjson"
1820
"github.com/tidwall/sjson"
@@ -93,10 +95,19 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
9395
}
9496
promptTokenCount := usageResult.Get("promptTokenCount").Int()
9597
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
98+
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
9699
template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
97100
if thoughtsTokenCount > 0 {
98101
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
99102
}
103+
// Include cached token count if present (indicates prompt caching is working)
104+
if cachedTokenCount > 0 {
105+
var err error
106+
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
107+
if err != nil {
108+
log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err)
109+
}
110+
}
100111
}
101112

102113
// Process the main content part of the response.

internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
244244
out, _ = sjson.SetRawBytes(out, "request.contents.-1", node)
245245

246246
// Append a single tool content combining name + response per function
247-
toolNode := []byte(`{"role":"tool","parts":[]}`)
247+
toolNode := []byte(`{"role":"user","parts":[]}`)
248248
pp := 0
249249
for _, fid := range fIDs {
250250
if name, ok := tcID2Name[fid]; ok {

internal/translator/gemini/openai/chat-completions/gemini_openai_request.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
286286
out, _ = sjson.SetRawBytes(out, "contents.-1", node)
287287

288288
// Append a single tool content combining name + response per function
289-
toolNode := []byte(`{"role":"tool","parts":[]}`)
289+
toolNode := []byte(`{"role":"user","parts":[]}`)
290290
pp := 0
291291
for _, fid := range fIDs {
292292
if name, ok := tcID2Name[fid]; ok {

internal/translator/gemini/openai/chat-completions/gemini_openai_response.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"sync/atomic"
1414
"time"
1515

16+
log "github.com/sirupsen/logrus"
1617
"github.com/tidwall/gjson"
1718
"github.com/tidwall/sjson"
1819
)
@@ -96,10 +97,19 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
9697
}
9798
promptTokenCount := usageResult.Get("promptTokenCount").Int()
9899
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
100+
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
99101
template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
100102
if thoughtsTokenCount > 0 {
101103
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
102104
}
105+
// Include cached token count if present (indicates prompt caching is working)
106+
if cachedTokenCount > 0 {
107+
var err error
108+
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
109+
if err != nil {
110+
log.Warnf("gemini openai response: failed to set cached_tokens in streaming: %v", err)
111+
}
112+
}
103113
}
104114

105115
// Process the main content part of the response.
@@ -240,10 +250,19 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
240250
}
241251
promptTokenCount := usageResult.Get("promptTokenCount").Int()
242252
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
253+
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
243254
template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
244255
if thoughtsTokenCount > 0 {
245256
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
246257
}
258+
// Include cached token count if present (indicates prompt caching is working)
259+
if cachedTokenCount > 0 {
260+
var err error
261+
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
262+
if err != nil {
263+
log.Warnf("gemini openai response: failed to set cached_tokens in non-streaming: %v", err)
264+
}
265+
}
247266
}
248267

249268
// Process the main content part of the response.

sdk/api/handlers/claude/code_handlers.go

Lines changed: 57 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
"fmt"
1515
"io"
1616
"net/http"
17-
"time"
1817

1918
"github.com/gin-gonic/gin"
2019
. "github.com/router-for-me/CLIProxyAPI/v6/internal/constant"
@@ -185,14 +184,6 @@ func (h *ClaudeCodeAPIHandler) handleNonStreamingResponse(c *gin.Context, rawJSO
185184
// - c: The Gin context for the request.
186185
// - rawJSON: The raw JSON request body.
187186
func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON []byte) {
188-
// Set up Server-Sent Events (SSE) headers for streaming response
189-
// These headers are essential for maintaining a persistent connection
190-
// and enabling real-time streaming of chat completions
191-
c.Header("Content-Type", "text/event-stream")
192-
c.Header("Cache-Control", "no-cache")
193-
c.Header("Connection", "keep-alive")
194-
c.Header("Access-Control-Allow-Origin", "*")
195-
196187
// Get the http.Flusher interface to manually flush the response.
197188
// This is crucial for streaming as it allows immediate sending of data chunks
198189
flusher, ok := c.Writer.(http.Flusher)
@@ -213,58 +204,82 @@ func (h *ClaudeCodeAPIHandler) handleStreamingResponse(c *gin.Context, rawJSON [
213204
cliCtx, cliCancel := h.GetContextWithCancel(h, c, context.Background())
214205

215206
dataChan, errChan := h.ExecuteStreamWithAuthManager(cliCtx, h.HandlerType(), modelName, rawJSON, "")
216-
h.forwardClaudeStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
217-
return
218-
}
207+
setSSEHeaders := func() {
208+
c.Header("Content-Type", "text/event-stream")
209+
c.Header("Cache-Control", "no-cache")
210+
c.Header("Connection", "keep-alive")
211+
c.Header("Access-Control-Allow-Origin", "*")
212+
}
219213

220-
func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
221-
// OpenAI-style stream forwarding: write each SSE chunk and flush immediately.
222-
// This guarantees clients see incremental output even for small responses.
214+
// Peek at the first chunk to determine success or failure before setting headers
223215
for {
224216
select {
225217
case <-c.Request.Context().Done():
226-
cancel(c.Request.Context().Err())
218+
cliCancel(c.Request.Context().Err())
227219
return
228-
229-
case chunk, ok := <-data:
220+
case errMsg, ok := <-errChan:
221+
if !ok {
222+
// Err channel closed cleanly; wait for data channel.
223+
errChan = nil
224+
continue
225+
}
226+
// Upstream failed immediately. Return proper error status and JSON.
227+
h.WriteErrorResponse(c, errMsg)
228+
if errMsg != nil {
229+
cliCancel(errMsg.Error)
230+
} else {
231+
cliCancel(nil)
232+
}
233+
return
234+
case chunk, ok := <-dataChan:
230235
if !ok {
236+
// Stream closed without data? Send DONE or just headers.
237+
setSSEHeaders()
231238
flusher.Flush()
232-
cancel(nil)
239+
cliCancel(nil)
233240
return
234241
}
242+
243+
// Success! Set headers now.
244+
setSSEHeaders()
245+
246+
// Write the first chunk
235247
if len(chunk) > 0 {
236248
_, _ = c.Writer.Write(chunk)
237249
flusher.Flush()
238250
}
239251

240-
case errMsg, ok := <-errs:
241-
if !ok {
242-
continue
243-
}
244-
if errMsg != nil {
245-
status := http.StatusInternalServerError
246-
if errMsg.StatusCode > 0 {
247-
status = errMsg.StatusCode
248-
}
249-
c.Status(status)
250-
251-
// An error occurred: emit as a proper SSE error event
252-
errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
253-
_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes)
254-
flusher.Flush()
255-
}
256-
257-
var execErr error
258-
if errMsg != nil {
259-
execErr = errMsg.Error
260-
}
261-
cancel(execErr)
252+
// Continue streaming the rest
253+
h.forwardClaudeStream(c, flusher, func(err error) { cliCancel(err) }, dataChan, errChan)
262254
return
263-
case <-time.After(500 * time.Millisecond):
264255
}
265256
}
266257
}
267258

259+
func (h *ClaudeCodeAPIHandler) forwardClaudeStream(c *gin.Context, flusher http.Flusher, cancel func(error), data <-chan []byte, errs <-chan *interfaces.ErrorMessage) {
260+
h.ForwardStream(c, flusher, cancel, data, errs, handlers.StreamForwardOptions{
261+
WriteChunk: func(chunk []byte) {
262+
if len(chunk) == 0 {
263+
return
264+
}
265+
_, _ = c.Writer.Write(chunk)
266+
},
267+
WriteTerminalError: func(errMsg *interfaces.ErrorMessage) {
268+
if errMsg == nil {
269+
return
270+
}
271+
status := http.StatusInternalServerError
272+
if errMsg.StatusCode > 0 {
273+
status = errMsg.StatusCode
274+
}
275+
c.Status(status)
276+
277+
errorBytes, _ := json.Marshal(h.toClaudeError(errMsg))
278+
_, _ = fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errorBytes)
279+
},
280+
})
281+
}
282+
268283
type claudeErrorDetail struct {
269284
Type string `json:"type"`
270285
Message string `json:"message"`

0 commit comments

Comments
 (0)