perf(core): make postgres vector sync tuning configurable

phernandez · phernandez · commit 2f34747c9fa9 · 2026-04-07T12:26:31.000-05:00
Signed-off-by: phernandez &lt;paul@basicmachines.co&gt;
diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py
@@ -198,6 +198,12 @@ class BasicMemoryConfig(BaseSettings):
         description="Batch size for vector sync orchestration flushes.",
         gt=0,
     )
+    semantic_postgres_prepare_concurrency: int = Field(
+        default=4,
+        description="Number of Postgres entity prepare tasks to run concurrently during vector sync. Postgres only; keep this low to avoid overdriving the database connection pool.",
+        gt=0,
+        le=16,
+    )
     semantic_embedding_cache_dir: str | None = Field(
         default=None,
         description="Optional cache directory for FastEmbed model artifacts.",
diff --git a/src/basic_memory/repository/postgres_search_repository.py b/src/basic_memory/repository/postgres_search_repository.py
@@ -28,9 +28,6 @@
 from basic_memory.schemas.search import SearchItemType, SearchRetrievalMode
 
 
-POSTGRES_VECTOR_PREPARE_CONCURRENCY = 4
-
-
 def _strip_nul_from_row(row_data: dict) -> dict:
     """Strip NUL bytes from all string values in a row dict.
 
@@ -71,6 +68,9 @@ def __init__(
         self._semantic_embedding_sync_batch_size = (
             self._app_config.semantic_embedding_sync_batch_size
         )
+        self._semantic_postgres_prepare_concurrency = (
+            self._app_config.semantic_postgres_prepare_concurrency
+        )
         self._embedding_provider = embedding_provider
         self._vector_dimensions = 384
         self._vector_tables_initialized = False
@@ -503,8 +503,8 @@ async def sync_entity_vectors_batch(
 
         Trigger: cloud indexing uses Neon Postgres where network latency dominates
         thousands of per-entity prepare queries.
-        Why: preparing a small window of entities concurrently hides round-trip latency
-        without exhausting the tenant connection pool.
+        Why: preparing a small config-driven window of entities concurrently hides
+        round-trip latency without exhausting the tenant connection pool.
         Outcome: Postgres vector sync keeps the existing flush semantics while reducing
         wall-clock time on large cloud projects.
         """
@@ -527,7 +527,7 @@ async def sync_entity_vectors_batch(
             project_id=self.project_id,
             entities_total=total_entities,
             sync_batch_size=self._semantic_embedding_sync_batch_size,
-            prepare_concurrency=POSTGRES_VECTOR_PREPARE_CONCURRENCY,
+            prepare_concurrency=self._semantic_postgres_prepare_concurrency,
         )
 
         pending_jobs: list[_PendingEmbeddingJob] = []
@@ -536,9 +536,9 @@ async def sync_entity_vectors_batch(
         deferred_entity_ids: set[int] = set()
         synced_entity_ids: set[int] = set()
 
-        for window_start in range(0, total_entities, POSTGRES_VECTOR_PREPARE_CONCURRENCY):
+        for window_start in range(0, total_entities, self._semantic_postgres_prepare_concurrency):
             window_entity_ids = entity_ids[
-                window_start : window_start + POSTGRES_VECTOR_PREPARE_CONCURRENCY
+                window_start : window_start + self._semantic_postgres_prepare_concurrency
             ]
 
             if progress_callback is not None:
diff --git a/tests/repository/test_postgres_search_repository_unit.py b/tests/repository/test_postgres_search_repository_unit.py
@@ -40,6 +40,7 @@ def _make_repo(
     *,
     semantic_enabled: bool = False,
     embedding_provider=None,
+    semantic_postgres_prepare_concurrency: int = 4,
 ) -> PostgresSearchRepository:
     """Build a PostgresSearchRepository with a no-op session maker."""
     session_maker = MagicMock()
@@ -49,6 +50,7 @@ def _make_repo(
         default_project="test-project",
         database_backend=DatabaseBackend.POSTGRES,
         semantic_search_enabled=semantic_enabled,
+        semantic_postgres_prepare_concurrency=semantic_postgres_prepare_concurrency,
     )
     return PostgresSearchRepository(
         session_maker,
@@ -255,6 +257,7 @@ async def test_sync_entity_vectors_batch_prepares_entities_concurrently(self, mo
         repo = _make_repo(
             semantic_enabled=True,
             embedding_provider=StubEmbeddingProvider(),
+            semantic_postgres_prepare_concurrency=2,
         )
         repo._semantic_embedding_sync_batch_size = 8
         repo._vector_tables_initialized = True
@@ -283,7 +286,7 @@ async def _stub_prepare(entity_id: int) -> _PreparedEntityVectorSync:
         assert result.entities_total == 4
         assert result.entities_synced == 4
         assert result.entities_failed == 0
-        assert max_active_prepares > 1
+        assert max_active_prepares == 2
 
 
 @pytest.mark.asyncio
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -882,6 +882,22 @@ def test_semantic_embedding_dimensions_can_be_set(self):
         config = BasicMemoryConfig(semantic_embedding_dimensions=1536)
         assert config.semantic_embedding_dimensions == 1536
 
+    def test_semantic_postgres_prepare_concurrency_defaults_to_4(self):
+        """Postgres prepare concurrency should default to a conservative window of 4."""
+        config = BasicMemoryConfig()
+        assert config.semantic_postgres_prepare_concurrency == 4
+
+    def test_semantic_postgres_prepare_concurrency_validation(self):
+        """Postgres prepare concurrency must stay within the bounded safe range."""
+        config = BasicMemoryConfig(semantic_postgres_prepare_concurrency=8)
+        assert config.semantic_postgres_prepare_concurrency == 8
+
+        with pytest.raises(Exception):
+            BasicMemoryConfig(semantic_postgres_prepare_concurrency=0)
+
+        with pytest.raises(Exception):
+            BasicMemoryConfig(semantic_postgres_prepare_concurrency=17)
+
     def test_semantic_search_enabled_description_mentions_both_backends(self):
         """Description should not say 'SQLite only' anymore."""
         field_info = BasicMemoryConfig.model_fields["semantic_search_enabled"]