From 9cd6e96d393988b0e4ffd784dec9e1518f28813d Mon Sep 17 00:00:00 2001 From: Claudio Canales Date: Sun, 4 Jan 2026 15:19:43 -0300 Subject: [PATCH 1/3] feat: add configurable request timeout for extended thinking models --- config.example.yaml | 3 +++ internal/config/config.go | 3 +++ internal/runtime/executor/antigravity_executor.go | 10 +++++----- internal/runtime/executor/claude_executor.go | 6 +++--- internal/runtime/executor/codex_executor.go | 4 ++-- internal/runtime/executor/gemini_cli_executor.go | 8 ++++---- internal/runtime/executor/gemini_executor.go | 6 +++--- .../runtime/executor/gemini_vertex_executor.go | 14 +++++++------- internal/runtime/executor/iflow_executor.go | 4 ++-- .../runtime/executor/openai_compat_executor.go | 4 ++-- internal/runtime/executor/proxy_helpers.go | 9 +++++++++ internal/runtime/executor/qwen_executor.go | 4 ++-- 12 files changed, 45 insertions(+), 30 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index 332fba705..e8703ec0c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -65,6 +65,9 @@ request-retry: 3 # Maximum wait time in seconds for a cooled-down credential before triggering a retry. max-retry-interval: 30 +# Maximum duration in seconds for upstream provider requests. Set to 0 to use the default (10 minutes). +request-timeout: 600 + # Quota exceeded behavior quota-exceeded: switch-project: true # Whether to automatically switch to another project when a quota is exceeded diff --git a/internal/config/config.go b/internal/config/config.go index e8ae3554f..13cf78624 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -59,6 +59,9 @@ type Config struct { RequestRetry int `yaml:"request-retry" json:"request-retry"` // MaxRetryInterval defines the maximum wait time in seconds before retrying a cooled-down credential. MaxRetryInterval int `yaml:"max-retry-interval" json:"max-retry-interval"` + // RequestTimeout defines the maximum request duration in seconds for upstream API calls. + // When set to 0, executors apply a sensible default. + RequestTimeout int `yaml:"request-timeout" json:"request-timeout"` // QuotaExceeded defines the behavior when a quota is exceeded. QuotaExceeded QuotaExceeded `yaml:"quota-exceeded" json:"quota-exceeded"` diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 0baba498d..1a1a444e3 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -109,7 +109,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) var lastStatus int var lastBody []byte @@ -209,7 +209,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) var lastStatus int var lastBody []byte @@ -550,7 +550,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) var lastStatus int var lastBody []byte @@ -698,7 +698,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut isClaude := strings.Contains(strings.ToLower(req.Model), "claude") baseURLs := antigravityBaseURLFallbackOrder(auth) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) var authID, authLabel, authType, authValue string if auth != nil { @@ -819,7 +819,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c } baseURLs := antigravityBaseURLFallbackOrder(auth) - httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, resolveRequestTimeout(cfg)) for idx, baseURL := range baseURLs { modelsURL := baseURL + antigravityModelsPath diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 7be4f41bd..73f7a5e9e 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -106,7 +106,7 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) @@ -218,7 +218,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) @@ -351,7 +351,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) resp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 0788e4f13..307c46c68 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -95,7 +95,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re AuthType: authType, AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) @@ -201,7 +201,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index e4bb7340c..4603ba790 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -104,7 +104,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth models = append([]string{req.Model}, models...) } - httpClient := newHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) respCtx := context.WithValue(ctx, "alt", opts.Alt) var authID, authLabel, authType, authValue string @@ -242,7 +242,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut models = append([]string{req.Model}, models...) } - httpClient := newHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) respCtx := context.WithValue(ctx, "alt", opts.Alt) var authID, authLabel, authType, authValue string @@ -414,7 +414,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. models = append([]string{req.Model}, models...) } - httpClient := newHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) respCtx := context.WithValue(ctx, "alt", opts.Alt) var authID, authLabel, authType, authValue string @@ -553,7 +553,7 @@ func prepareGeminiCLITokenSource(ctx context.Context, cfg *config.Config, auth * } ctxToken := ctx - if httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil { + if httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, resolveRequestTimeout(cfg)); httpClient != nil { ctxToken = context.WithValue(ctxToken, oauth2.HTTPClient, httpClient) } diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index 192f42e25..2cda24e63 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -142,7 +142,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) @@ -241,7 +241,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) @@ -354,7 +354,7 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) resp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index bcf4473cf..8feaa07ad 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -186,7 +186,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { recordAPIResponseError(ctx, e.cfg, errDo) @@ -295,7 +295,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { recordAPIResponseError(ctx, e.cfg, errDo) @@ -394,7 +394,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { recordAPIResponseError(ctx, e.cfg, errDo) @@ -519,7 +519,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { recordAPIResponseError(ctx, e.cfg, errDo) @@ -626,7 +626,7 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { recordAPIResponseError(ctx, e.cfg, errDo) @@ -718,7 +718,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, errDo := httpClient.Do(httpReq) if errDo != nil { recordAPIResponseError(ctx, e.cfg, errDo) @@ -817,7 +817,7 @@ func vertexBaseURL(location string) string { } func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyauth.Auth, saJSON []byte) (string, error) { - if httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, 0); httpClient != nil { + if httpClient := newProxyAwareHTTPClient(ctx, cfg, auth, resolveRequestTimeout(cfg)); httpClient != nil { ctx = context.WithValue(ctx, oauth2.HTTPClient, httpClient) } // Use cloud-platform scope for Vertex AI. diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index e1b0394e9..746fdb523 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -97,7 +97,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) @@ -197,7 +197,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 60c80f9d7..3c6f03ff5 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -104,7 +104,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) @@ -203,7 +203,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) diff --git a/internal/runtime/executor/proxy_helpers.go b/internal/runtime/executor/proxy_helpers.go index ab0f626ac..50465aa94 100644 --- a/internal/runtime/executor/proxy_helpers.go +++ b/internal/runtime/executor/proxy_helpers.go @@ -14,6 +14,15 @@ import ( "golang.org/x/net/proxy" ) +const defaultRequestTimeout = 10 * time.Minute + +func resolveRequestTimeout(cfg *config.Config) time.Duration { + if cfg == nil || cfg.RequestTimeout <= 0 { + return defaultRequestTimeout + } + return time.Duration(cfg.RequestTimeout) * time.Second +} + // newProxyAwareHTTPClient creates an HTTP client with proper proxy configuration priority: // 1. Use auth.ProxyURL if configured (highest priority) // 2. Use cfg.ProxyURL if auth proxy is not configured diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index be6c10244..5ea085c46 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -87,7 +87,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) @@ -176,7 +176,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut AuthValue: authValue, }) - httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) + httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, resolveRequestTimeout(e.cfg)) httpResp, err := httpClient.Do(httpReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) From 90c358c96a7af4b615b81f33363874bb2412038c Mon Sep 17 00:00:00 2001 From: Claudio Canales Date: Sun, 4 Jan 2026 15:42:20 -0300 Subject: [PATCH 2/3] Update internal/config/config.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- internal/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/config/config.go b/internal/config/config.go index 13cf78624..40c9df29b 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -60,7 +60,7 @@ type Config struct { // MaxRetryInterval defines the maximum wait time in seconds before retrying a cooled-down credential. MaxRetryInterval int `yaml:"max-retry-interval" json:"max-retry-interval"` // RequestTimeout defines the maximum request duration in seconds for upstream API calls. - // When set to 0, executors apply a sensible default. + // When set to 0 or not specified, a default of 10 minutes is used. RequestTimeout int `yaml:"request-timeout" json:"request-timeout"` // QuotaExceeded defines the behavior when a quota is exceeded. From ff5244a726120fa8f675306e3f603b26cf03e73e Mon Sep 17 00:00:00 2001 From: Claudio Canales Date: Sun, 4 Jan 2026 16:00:44 -0300 Subject: [PATCH 3/3] Update config.example.yaml request-timeout comment --- config.example.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.example.yaml b/config.example.yaml index e8703ec0c..2a0d0eec9 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -65,7 +65,7 @@ request-retry: 3 # Maximum wait time in seconds for a cooled-down credential before triggering a retry. max-retry-interval: 30 -# Maximum duration in seconds for upstream provider requests. Set to 0 to use the default (10 minutes). +# Maximum duration in seconds for upstream provider requests. Defaults to 600s (10 minutes) if omitted or set to 0. request-timeout: 600 # Quota exceeded behavior