Skip to content

Commit 4a0f5b0

Browse files
jsbattigclaude
andcommitted
fix: add_index semantic rebuild uses --clear for full re-index (Bug #468, v9.5.27)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent ac81a79 commit 4a0f5b0

5 files changed

Lines changed: 256 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## v9.5.27
9+
10+
### Bug Fixes
11+
12+
- fix: add_index semantic rebuild now uses --clear flag for full re-index (Bug #468)
13+
- Previously ran bare `cidx index` (incremental) which was a no-op on already-indexed repos
14+
- Now runs `cidx index --clear` to force full semantic vector regeneration
15+
816
## v9.5.26
917

1018
### Bug Fixes

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
AI-powered semantic code search for your codebase. Find code by meaning, not just keywords.
44

5-
**Version 9.5.26** - [Changelog](CHANGELOG.md) | [Migration Guide](docs/migration-to-v8.md) | [Architecture](docs/architecture.md)
5+
**Version 9.5.27** - [Changelog](CHANGELOG.md) | [Migration Guide](docs/migration-to-v8.md) | [Architecture](docs/architecture.md)
66

77
## Quick Navigation
88

src/code_indexer/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@
66
HNSW graph indexing (O(log N) complexity).
77
"""
88

9-
__version__ = "9.5.26"
9+
__version__ = "9.5.27"
1010
__author__ = "Seba Battig"

src/code_indexer/server/repositories/golden_repo_manager.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2419,9 +2419,10 @@ def background_worker() -> Dict[str, Any]:
24192419
if init_result.stderr:
24202420
captured_stderr += f"[init] {init_result.stderr}\n"
24212421

2422-
# semantic - execute cidx index (semantic embeddings only)
2422+
# semantic - execute cidx index --clear (force full rebuild)
2423+
# Bug #468: without --clear this is a no-op for already-indexed repos
24232424
if index_type == "semantic":
2424-
command = ["cidx", "index"]
2425+
command = ["cidx", "index", "--clear"]
24252426
result = subprocess.run(
24262427
command,
24272428
cwd=repo_path,
Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
"""
2+
Unit tests for Bug #468: add_index semantic rebuild completes instantly without rebuilding.
3+
4+
Root cause: add_index_to_golden_repo runs ["cidx", "index"] without --clear for semantic
5+
index type, which is a no-op for already-indexed repos.
6+
7+
Fix: semantic must use ["cidx", "index", "--clear"] to force a full rebuild.
8+
9+
Acceptance criteria:
10+
- semantic index_type issues ["cidx", "index", "--clear"]
11+
- fts index_type issues ["cidx", "index", "--rebuild-fts-index"] (unchanged, verified)
12+
"""
13+
14+
from unittest.mock import MagicMock, patch
15+
16+
import pytest
17+
18+
from code_indexer.server.repositories.golden_repo_manager import GoldenRepoManager
19+
20+
21+
# ---------------------------------------------------------------------------
22+
# Helpers
23+
# ---------------------------------------------------------------------------
24+
25+
26+
def _make_success_run(repo_path):
27+
"""Return a subprocess.run mock that always succeeds."""
28+
29+
def mock_run(cmd, **kwargs):
30+
result = MagicMock()
31+
result.returncode = 0
32+
result.stdout = "ok"
33+
result.stderr = ""
34+
return result
35+
36+
return mock_run
37+
38+
39+
# ---------------------------------------------------------------------------
40+
# Fixtures
41+
# ---------------------------------------------------------------------------
42+
43+
44+
@pytest.fixture
45+
def manager(tmp_path):
46+
"""Minimal GoldenRepoManager backed by a temp dir."""
47+
mgr = GoldenRepoManager(data_dir=str(tmp_path))
48+
return mgr
49+
50+
51+
@pytest.fixture
52+
def repo_path(tmp_path):
53+
"""A fake repo directory that exists on disk."""
54+
p = tmp_path / "repos" / "test-repo"
55+
p.mkdir(parents=True)
56+
return p
57+
58+
59+
@pytest.fixture
60+
def registered_manager(manager, repo_path):
61+
"""GoldenRepoManager with one repo registered and a mock background_job_manager."""
62+
from datetime import datetime, timezone
63+
from code_indexer.server.repositories.golden_repo_manager import GoldenRepo
64+
65+
repo = GoldenRepo(
66+
alias="test-repo",
67+
repo_url="git@github.com:org/test-repo.git",
68+
clone_path=str(repo_path),
69+
default_branch="main",
70+
created_at=datetime.now(timezone.utc).isoformat(),
71+
)
72+
manager.golden_repos["test-repo"] = repo
73+
74+
# background_job_manager is an externally-set field (not initialized in __init__).
75+
# Inject a mock so add_index_to_golden_repo can call submit_job.
76+
manager.background_job_manager = MagicMock()
77+
return manager
78+
79+
80+
# ---------------------------------------------------------------------------
81+
# Tests
82+
# ---------------------------------------------------------------------------
83+
84+
85+
class TestAddIndexSemanticCommand:
86+
"""Bug #468: semantic add_index must issue cidx index --clear."""
87+
88+
def _capture_subprocess_calls(self, registered_manager, repo_path, index_type):
89+
"""
90+
Call add_index_to_golden_repo and capture every subprocess.run invocation.
91+
92+
The method submits a background job via BackgroundJobManager. We
93+
intercept submit_job to run the worker function synchronously so we can
94+
capture subprocess calls without threading complexity.
95+
"""
96+
captured_cmds = []
97+
98+
def fake_submit_job(operation_type, func, **kwargs):
99+
"""Run the background worker synchronously and capture subprocess calls."""
100+
with patch("subprocess.run") as mock_run:
101+
mock_run.side_effect = _make_success_run(str(repo_path))
102+
# We need the mock to actually record calls.
103+
collected = []
104+
105+
def recording_run(cmd, **kw):
106+
collected.append(list(cmd))
107+
r = MagicMock()
108+
r.returncode = 0
109+
r.stdout = "ok"
110+
r.stderr = ""
111+
return r
112+
113+
mock_run.side_effect = recording_run
114+
# Also patch get_actual_repo_path so it returns our temp path.
115+
with patch.object(
116+
registered_manager,
117+
"get_actual_repo_path",
118+
return_value=str(repo_path),
119+
):
120+
func()
121+
captured_cmds.extend(collected)
122+
return "fake-job-id"
123+
124+
with patch.object(
125+
registered_manager.background_job_manager,
126+
"submit_job",
127+
side_effect=fake_submit_job,
128+
):
129+
registered_manager.add_index_to_golden_repo(
130+
alias="test-repo",
131+
index_type=index_type,
132+
)
133+
134+
return captured_cmds
135+
136+
def test_semantic_index_uses_clear_flag(self, registered_manager, repo_path):
137+
"""
138+
Bug #468: cidx index for semantic rebuild must include --clear.
139+
140+
Without --clear, cidx index is incremental and is a no-op for an
141+
already-indexed repository. A rebuild must wipe and re-embed.
142+
"""
143+
cmds = self._capture_subprocess_calls(registered_manager, repo_path, "semantic")
144+
145+
# Find the cidx index command (not the cidx init command)
146+
index_cmds = [c for c in cmds if c[:2] == ["cidx", "index"]]
147+
assert index_cmds, (
148+
"No 'cidx index' command was issued for semantic index type. "
149+
f"All captured commands: {cmds}"
150+
)
151+
152+
semantic_cmd = index_cmds[0]
153+
assert "--clear" in semantic_cmd, (
154+
f"Bug #468: 'cidx index' for semantic must include '--clear' to force "
155+
f"full rebuild. Got command: {semantic_cmd}. "
156+
"Without --clear the command is a no-op for already-indexed repos."
157+
)
158+
159+
def test_semantic_index_does_not_use_rebuild_fts_flag(
160+
self, registered_manager, repo_path
161+
):
162+
"""Semantic rebuild must NOT include --rebuild-fts-index (that is FTS only)."""
163+
cmds = self._capture_subprocess_calls(registered_manager, repo_path, "semantic")
164+
165+
index_cmds = [c for c in cmds if c[:2] == ["cidx", "index"]]
166+
assert index_cmds, f"No 'cidx index' command captured. All commands: {cmds}"
167+
168+
semantic_cmd = index_cmds[0]
169+
assert "--rebuild-fts-index" not in semantic_cmd, (
170+
f"Semantic rebuild must not include --rebuild-fts-index. "
171+
f"Got: {semantic_cmd}"
172+
)
173+
174+
175+
class TestAddIndexFtsCommand:
176+
"""Verify fts index_type is unchanged and uses --rebuild-fts-index (not --clear)."""
177+
178+
def _capture_subprocess_calls(self, registered_manager, repo_path, index_type):
179+
"""Same helper pattern as TestAddIndexSemanticCommand."""
180+
captured_cmds = []
181+
182+
def fake_submit_job(operation_type, func, **kwargs):
183+
collected = []
184+
185+
def recording_run(cmd, **kw):
186+
collected.append(list(cmd))
187+
r = MagicMock()
188+
r.returncode = 0
189+
r.stdout = "ok"
190+
r.stderr = ""
191+
return r
192+
193+
with patch("subprocess.run", side_effect=recording_run):
194+
with patch.object(
195+
registered_manager,
196+
"get_actual_repo_path",
197+
return_value=str(repo_path),
198+
):
199+
func()
200+
captured_cmds.extend(collected)
201+
return "fake-job-id"
202+
203+
with patch.object(
204+
registered_manager.background_job_manager,
205+
"submit_job",
206+
side_effect=fake_submit_job,
207+
):
208+
registered_manager.add_index_to_golden_repo(
209+
alias="test-repo",
210+
index_type=index_type,
211+
)
212+
213+
return captured_cmds
214+
215+
def test_fts_index_uses_rebuild_fts_index_flag(self, registered_manager, repo_path):
216+
"""
217+
FTS index_type must issue cidx index --rebuild-fts-index.
218+
219+
This verifies that the existing fts behavior is correct and unchanged.
220+
"""
221+
cmds = self._capture_subprocess_calls(registered_manager, repo_path, "fts")
222+
223+
index_cmds = [c for c in cmds if c[:2] == ["cidx", "index"]]
224+
assert (
225+
index_cmds
226+
), f"No 'cidx index' command issued for fts index type. Commands: {cmds}"
227+
228+
fts_cmd = index_cmds[0]
229+
assert (
230+
"--rebuild-fts-index" in fts_cmd
231+
), f"FTS index must use '--rebuild-fts-index'. Got: {fts_cmd}"
232+
233+
def test_fts_index_does_not_use_clear_flag(self, registered_manager, repo_path):
234+
"""FTS rebuild must NOT include --clear (that is for semantic only)."""
235+
cmds = self._capture_subprocess_calls(registered_manager, repo_path, "fts")
236+
237+
index_cmds = [c for c in cmds if c[:2] == ["cidx", "index"]]
238+
assert index_cmds, f"No 'cidx index' command captured. Commands: {cmds}"
239+
240+
fts_cmd = index_cmds[0]
241+
assert (
242+
"--clear" not in fts_cmd
243+
), f"FTS rebuild must not include --clear. Got: {fts_cmd}"

0 commit comments

Comments
 (0)