Skip to content

Commit 30700de

Browse files
authored
feat(embeddings): make OpenAI-compatible batch size configurable (#1142) (#1143)
OpenAIEmbeddings hardcoded batch_size=100 is incompatible with some OpenAI-compatible providers that enforce smaller per-request limits (e.g. DashScope / Aliyun Tongyi caps at 10). Without an override, retain paths that extract > 10 facts fail with 400 errors. Expose HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE (default 100) and propagate it to both the 'openai' and 'openrouter' providers, which share the same OpenAIEmbeddings client. Values <= 0 or non-integer are rejected at config load time (_parse_positive_int) to fail fast instead of triggering infinite loops or zero-step range() calls. The new HindsightConfig field has a dataclass default so existing direct constructors (tests, external integrations) keep working. Fixes #1142.
1 parent a63253f commit 30700de

5 files changed

Lines changed: 189 additions & 1 deletion

File tree

hindsight-api-slim/hindsight_api/config.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
177177
ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
178178
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
179179
ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"
180+
ENV_EMBEDDINGS_OPENAI_BATCH_SIZE = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"
180181

181182
# Gemini/Vertex AI embeddings configuration
182183
ENV_EMBEDDINGS_GEMINI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_GEMINI_API_KEY"
@@ -468,6 +469,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
468469
DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
469470
DEFAULT_EMBEDDINGS_LOCAL_TRUST_REMOTE_CODE = False # Security: disabled by default, required for some models
470471
DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
472+
DEFAULT_EMBEDDINGS_OPENAI_BATCH_SIZE = 100
471473
DEFAULT_EMBEDDINGS_GEMINI_MODEL = "gemini-embedding-001"
472474
DEFAULT_EMBEDDINGS_GEMINI_OUTPUT_DIMENSIONALITY = 768
473475
DEFAULT_EMBEDDING_DIMENSION = 384
@@ -725,6 +727,25 @@ def _parse_str_list(value: str) -> list[str]:
725727
return [v.strip() for v in value.split(",") if v.strip()]
726728

727729

730+
def _parse_positive_int(name: str, raw: str | None, default: int) -> int:
731+
"""
732+
Parse an env var that must be a positive integer (>= 1).
733+
734+
Falls back to ``default`` when unset/empty. Raises ValueError on non-integer
735+
or non-positive values so misconfiguration fails fast instead of triggering
736+
infinite loops or zero-step range() calls downstream.
737+
"""
738+
if raw is None or raw == "":
739+
return default
740+
try:
741+
parsed = int(raw)
742+
except ValueError as e:
743+
raise ValueError(f"{name} must be an integer, got {raw!r}") from e
744+
if parsed < 1:
745+
raise ValueError(f"{name} must be >= 1, got {parsed}")
746+
return parsed
747+
748+
728749
def _validate_extraction_mode(mode: str) -> str:
729750
"""Validate and normalize extraction mode."""
730751
mode_lower = mode.lower()
@@ -1068,6 +1089,10 @@ class HindsightConfig:
10681089
webhook_event_types: list[str] # Event types to deliver globally
10691090
webhook_delivery_poll_interval_seconds: int # How often the delivery worker polls
10701091

1092+
# Defaulted fields (source-compatible additions — existing direct constructor callers keep working).
1093+
# Keep at the end of the dataclass; Python forbids non-default fields after default fields.
1094+
embeddings_openai_batch_size: int = DEFAULT_EMBEDDINGS_OPENAI_BATCH_SIZE
1095+
10711096
# Class-level sets for configuration categorization
10721097

10731098
# CREDENTIAL_FIELDS: Never exposed via API, never configurable per-tenant/bank
@@ -1376,6 +1401,11 @@ def from_env(cls) -> "HindsightConfig":
13761401
in ("true", "1"),
13771402
embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
13781403
embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
1404+
embeddings_openai_batch_size=_parse_positive_int(
1405+
ENV_EMBEDDINGS_OPENAI_BATCH_SIZE,
1406+
os.getenv(ENV_EMBEDDINGS_OPENAI_BATCH_SIZE),
1407+
DEFAULT_EMBEDDINGS_OPENAI_BATCH_SIZE,
1408+
),
13791409
# Cohere embeddings (with backward-compatible fallback to shared API key)
13801410
embeddings_cohere_api_key=os.getenv(ENV_EMBEDDINGS_COHERE_API_KEY) or os.getenv(ENV_COHERE_API_KEY),
13811411
embeddings_cohere_model=os.getenv(ENV_EMBEDDINGS_COHERE_MODEL, DEFAULT_EMBEDDINGS_COHERE_MODEL),

hindsight-api-slim/hindsight_api/engine/embeddings.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,12 @@ def create_embeddings_from_env() -> Embeddings:
11001100
)
11011101
model = os.environ.get(ENV_EMBEDDINGS_OPENAI_MODEL, DEFAULT_EMBEDDINGS_OPENAI_MODEL)
11021102
base_url = os.environ.get(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None
1103-
return OpenAIEmbeddings(api_key=api_key, model=model, base_url=base_url)
1103+
return OpenAIEmbeddings(
1104+
api_key=api_key,
1105+
model=model,
1106+
base_url=base_url,
1107+
batch_size=config.embeddings_openai_batch_size,
1108+
)
11041109
elif provider == "openrouter":
11051110
api_key = config.embeddings_openrouter_api_key
11061111
if not api_key:
@@ -1112,6 +1117,7 @@ def create_embeddings_from_env() -> Embeddings:
11121117
api_key=api_key,
11131118
model=config.embeddings_openrouter_model,
11141119
base_url="https://openrouter.ai/api/v1",
1120+
batch_size=config.embeddings_openai_batch_size,
11151121
)
11161122
elif provider == "cohere":
11171123
api_key = config.embeddings_cohere_api_key
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
"""
2+
Tests for HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE config wiring.
3+
4+
Regression test for issue #1142: `OpenAIEmbeddings` hardcoded `batch_size=100` is
5+
incompatible with OpenAI-compatible providers that enforce stricter per-request
6+
limits (e.g. DashScope / Aliyun Tongyi cap at 10). Users must be able to override
7+
the batch size via env var so `encode()` splits into smaller chunks.
8+
"""
9+
10+
import os
11+
12+
import pytest
13+
14+
15+
@pytest.fixture(autouse=True)
16+
def setup_test_env():
17+
"""Save/restore env vars touched by these tests."""
18+
from hindsight_api.config import clear_config_cache
19+
20+
env_vars_to_save = [
21+
"HINDSIGHT_API_EMBEDDINGS_PROVIDER",
22+
"HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY",
23+
"HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL",
24+
"HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE",
25+
"HINDSIGHT_API_EMBEDDINGS_OPENROUTER_API_KEY",
26+
"HINDSIGHT_API_LLM_API_KEY",
27+
"HINDSIGHT_API_LLM_PROVIDER",
28+
]
29+
30+
original_values = {key: os.environ.get(key) for key in env_vars_to_save}
31+
32+
clear_config_cache()
33+
34+
yield
35+
36+
for key, original_value in original_values.items():
37+
if original_value is None:
38+
os.environ.pop(key, None)
39+
else:
40+
os.environ[key] = original_value
41+
42+
clear_config_cache()
43+
44+
45+
def test_default_openai_batch_size_is_100():
46+
"""Default batch size is 100 when env var unset (preserves legacy behavior)."""
47+
from hindsight_api.config import HindsightConfig
48+
49+
os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
50+
os.environ.pop("HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE", None)
51+
52+
config = HindsightConfig.from_env()
53+
assert config.embeddings_openai_batch_size == 100
54+
55+
56+
def test_openai_batch_size_env_var_is_read():
57+
"""HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE overrides the default."""
58+
from hindsight_api.config import HindsightConfig
59+
60+
os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
61+
os.environ["HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"] = "10"
62+
63+
config = HindsightConfig.from_env()
64+
assert config.embeddings_openai_batch_size == 10
65+
66+
67+
def test_openai_embeddings_provider_uses_configured_batch_size():
68+
"""create_embeddings_from_env() propagates config to OpenAIEmbeddings for 'openai' provider."""
69+
from hindsight_api.engine.embeddings import OpenAIEmbeddings, create_embeddings_from_env
70+
71+
os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
72+
os.environ["HINDSIGHT_API_EMBEDDINGS_PROVIDER"] = "openai"
73+
os.environ["HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"] = "sk-test"
74+
os.environ["HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"] = "10"
75+
76+
embeddings = create_embeddings_from_env()
77+
assert isinstance(embeddings, OpenAIEmbeddings)
78+
assert embeddings.batch_size == 10
79+
80+
81+
def test_openrouter_provider_uses_configured_batch_size():
82+
"""'openrouter' provider also honors the shared batch-size config (both paths use OpenAIEmbeddings)."""
83+
from hindsight_api.engine.embeddings import OpenAIEmbeddings, create_embeddings_from_env
84+
85+
os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
86+
os.environ["HINDSIGHT_API_EMBEDDINGS_PROVIDER"] = "openrouter"
87+
os.environ["HINDSIGHT_API_EMBEDDINGS_OPENROUTER_API_KEY"] = "sk-or-test"
88+
os.environ["HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"] = "8"
89+
90+
embeddings = create_embeddings_from_env()
91+
assert isinstance(embeddings, OpenAIEmbeddings)
92+
assert embeddings.batch_size == 8
93+
94+
95+
def test_zero_batch_size_is_rejected():
96+
"""Zero would cause `range(0, N, 0)` to crash at runtime — fail fast at config load."""
97+
from hindsight_api.config import HindsightConfig
98+
99+
os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
100+
os.environ["HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"] = "0"
101+
102+
with pytest.raises(ValueError, match="must be >= 1"):
103+
HindsightConfig.from_env()
104+
105+
106+
def test_negative_batch_size_is_rejected():
107+
"""Negative values would silently skip batching — reject at config load."""
108+
from hindsight_api.config import HindsightConfig
109+
110+
os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
111+
os.environ["HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"] = "-5"
112+
113+
with pytest.raises(ValueError, match="must be >= 1"):
114+
HindsightConfig.from_env()
115+
116+
117+
def test_non_numeric_batch_size_is_rejected():
118+
"""Non-integer strings are rejected with a clear error pointing at the env var name."""
119+
from hindsight_api.config import HindsightConfig
120+
121+
os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
122+
os.environ["HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"] = "not-a-number"
123+
124+
with pytest.raises(ValueError, match="HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"):
125+
HindsightConfig.from_env()
126+
127+
128+
def test_openai_encode_splits_on_configured_batch_size(monkeypatch):
129+
"""encode() sends multiple upstream requests when len(texts) > batch_size."""
130+
from types import SimpleNamespace
131+
132+
from hindsight_api.engine.embeddings import OpenAIEmbeddings
133+
134+
emb = OpenAIEmbeddings(api_key="sk-test", model="text-embedding-3-small", batch_size=10)
135+
136+
calls: list[int] = []
137+
138+
def fake_create(*, model, input):
139+
calls.append(len(input))
140+
return SimpleNamespace(data=[SimpleNamespace(index=i, embedding=[0.0] * 1536) for i in range(len(input))])
141+
142+
emb._client = SimpleNamespace(embeddings=SimpleNamespace(create=fake_create))
143+
emb._dimension = 1536
144+
145+
vectors = emb.encode(["x"] * 25)
146+
147+
assert len(vectors) == 25
148+
assert calls == [10, 10, 5], (
149+
f"Expected upstream calls of size 10, 10, 5 when batch_size=10 and 25 inputs, got {calls}"
150+
)

hindsight-docs/docs/developer/configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ export HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF=120.0 # Cap at 2min instead of 1m
401401
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY` | OpenAI API key (falls back to `HINDSIGHT_API_LLM_API_KEY`) | - |
402402
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL` | OpenAI embedding model | `text-embedding-3-small` |
403403
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL` | Custom base URL for OpenAI-compatible API (e.g., Azure OpenAI) | - |
404+
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE` | Max inputs per `embeddings.create` call for `openai`/`openrouter` providers — lower this when the upstream endpoint enforces stricter limits (e.g. DashScope caps at 10) | `100` |
404405
| `HINDSIGHT_API_EMBEDDINGS_OPENROUTER_API_KEY` | OpenRouter API key for embeddings (falls back to `HINDSIGHT_API_OPENROUTER_API_KEY`, then `HINDSIGHT_API_LLM_API_KEY`) | - |
405406
| `HINDSIGHT_API_EMBEDDINGS_OPENROUTER_MODEL` | OpenRouter embedding model | `perplexity/pplx-embed-v1-0.6b` |
406407
| `HINDSIGHT_API_EMBEDDINGS_COHERE_API_KEY` | Cohere API key for embeddings | - |

skills/hindsight-docs/references/developer/configuration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ export HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF=120.0 # Cap at 2min instead of 1m
401401
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY` | OpenAI API key (falls back to `HINDSIGHT_API_LLM_API_KEY`) | - |
402402
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL` | OpenAI embedding model | `text-embedding-3-small` |
403403
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL` | Custom base URL for OpenAI-compatible API (e.g., Azure OpenAI) | - |
404+
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE` | Max inputs per `embeddings.create` call for `openai`/`openrouter` providers — lower this when the upstream endpoint enforces stricter limits (e.g. DashScope caps at 10) | `100` |
404405
| `HINDSIGHT_API_EMBEDDINGS_OPENROUTER_API_KEY` | OpenRouter API key for embeddings (falls back to `HINDSIGHT_API_OPENROUTER_API_KEY`, then `HINDSIGHT_API_LLM_API_KEY`) | - |
405406
| `HINDSIGHT_API_EMBEDDINGS_OPENROUTER_MODEL` | OpenRouter embedding model | `perplexity/pplx-embed-v1-0.6b` |
406407
| `HINDSIGHT_API_EMBEDDINGS_COHERE_API_KEY` | Cohere API key for embeddings | - |

0 commit comments

Comments
 (0)