Skip to content

Commit 06c8fc3

Browse files
authored
Merge pull request #24 from riteshverma/feature/rag-debug-reasoning
feat: add configurable RAG rerank and RemMe lifecycle controls
2 parents 0ce5ab6 + a8aac93 commit 06c8fc3

10 files changed

Lines changed: 1440 additions & 31 deletions

File tree

config/settings.defaults.json

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,16 @@
2929
"chunk_overlap": 40,
3030
"max_chunk_length": 512,
3131
"semantic_word_limit": 1024,
32-
"top_k": 3
32+
"top_k": 3,
33+
"rerank": {
34+
"enabled": false,
35+
"provider": "local_oss",
36+
"model": "",
37+
"candidate_k": 40,
38+
"top_k": null,
39+
"timeout_seconds": 8.0,
40+
"batch_size": 8
41+
}
3342
},
3443
"agent": {
3544
"model_provider": "gemini",
@@ -63,6 +72,53 @@
6372
"read_from_bridge": false,
6473
"mirror_dir": "memory/gbrain_bridge",
6574
"server_id": "gbrain"
75+
},
76+
"policy": {
77+
"enabled": true,
78+
"write": {
79+
"max_text_length": 2000,
80+
"blocked_patterns": [],
81+
"allowed_sources": [],
82+
"denied_sources": [],
83+
"allowed_categories": [],
84+
"denied_categories": [],
85+
"default_ttl_seconds": null,
86+
"source_ttl_overrides": {
87+
"run_*": 1209600,
88+
"manual_scan_*": 604800,
89+
"backfill_*": 259200
90+
}
91+
},
92+
"read": {
93+
"allowed_sources": [],
94+
"denied_sources": [],
95+
"allowed_categories": [],
96+
"denied_categories": [],
97+
"exclude_expired": true,
98+
"requester_overrides": {
99+
"run_context": {
100+
"allowed_sources": [
101+
"manual",
102+
"run_*"
103+
],
104+
"denied_sources": [
105+
"manual_scan_*",
106+
"backfill_*"
107+
],
108+
"allowed_categories": [],
109+
"denied_categories": []
110+
},
111+
"smart_scan": {
112+
"allowed_sources": [
113+
"manual",
114+
"run_*"
115+
],
116+
"denied_sources": [],
117+
"allowed_categories": [],
118+
"denied_categories": []
119+
}
120+
}
121+
}
66122
}
67123
},
68124
"gemini": {

config/settings.json

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,16 @@
1717
"chunk_overlap": 40,
1818
"max_chunk_length": 512,
1919
"semantic_word_limit": 1024,
20-
"top_k": 3
20+
"top_k": 3,
21+
"rerank": {
22+
"enabled": true,
23+
"provider": "local_oss",
24+
"model": "",
25+
"candidate_k": 40,
26+
"top_k": null,
27+
"timeout_seconds": 8.0,
28+
"batch_size": 8
29+
}
2130
},
2231
"agent": {
2332
"model_provider": "ollama",
@@ -44,7 +53,54 @@
4453
"test_agent_model": "gemma3:4b"
4554
},
4655
"remme": {
47-
"extraction_prompt": "You are a Contextual Memory Management AI.\nYour job is to update the \"Memory Vault\" based on the latest conversation.\n\nRULES:\n1. ANTI-FRAGMENTATION: NEVER split related items into separate facts. Merge them into ONE rich, coherent memory entry.\n2. NO REDUNDANCY: If info is already captured, do nothing unless you have NEW details (use \"update\").\n3. CONTEXTUAL HUBS: Prefer a single \"Hub\" memory over separate atomic facts; emit it as one entry in \"memories\" (see OUTPUT FORMAT).\n4. NO NEGATIVE FACTS: NEVER store \"not found\" or \"missing\" info.\n5. NO META-LOGS: Do not store internal reasoning or agent traces.\n6. HIGH SALIENCE ONLY: Focus on project decisions, user preferences, architectural details.\n7. ACTIONS: \"add\" for new facts, \"update\" to expand existing, \"delete\" if proven false.\n\nOUTPUT FORMAT: Respond with JSON only: {\"memories\": [{\"action\": \"add\"|\"update\"|\"delete\", \"text\": \"...\", \"id\": \"...\" only for update/delete}], \"preferences\": {...}}. If nothing to store: {\"memories\": [], \"preferences\": {}}."
56+
"extraction_prompt": "You are a Contextual Memory Management AI.\nYour job is to update the \"Memory Vault\" based on the latest conversation.\n\nRULES:\n1. ANTI-FRAGMENTATION: NEVER split related items into separate facts. Merge them into ONE rich, coherent memory entry.\n2. NO REDUNDANCY: If info is already captured, do nothing unless you have NEW details (use \"update\").\n3. CONTEXTUAL HUBS: Prefer a single \"Hub\" memory over separate atomic facts; emit it as one entry in \"memories\" (see OUTPUT FORMAT).\n4. NO NEGATIVE FACTS: NEVER store \"not found\" or \"missing\" info.\n5. NO META-LOGS: Do not store internal reasoning or agent traces.\n6. HIGH SALIENCE ONLY: Focus on project decisions, user preferences, architectural details.\n7. ACTIONS: \"add\" for new facts, \"update\" to expand existing, \"delete\" if proven false.\n\nOUTPUT FORMAT: Respond with JSON only: {\"memories\": [{\"action\": \"add\"|\"update\"|\"delete\", \"text\": \"...\", \"id\": \"...\" only for update/delete}], \"preferences\": {...}}. If nothing to store: {\"memories\": [], \"preferences\": {}}.",
57+
"policy": {
58+
"enabled": true,
59+
"write": {
60+
"max_text_length": 2000,
61+
"blocked_patterns": [],
62+
"allowed_sources": [],
63+
"denied_sources": [],
64+
"allowed_categories": [],
65+
"denied_categories": [],
66+
"default_ttl_seconds": null,
67+
"source_ttl_overrides": {
68+
"run_*": 1209600,
69+
"manual_scan_*": 604800,
70+
"backfill_*": 259200
71+
}
72+
},
73+
"read": {
74+
"allowed_sources": [],
75+
"denied_sources": [],
76+
"allowed_categories": [],
77+
"denied_categories": [],
78+
"exclude_expired": true,
79+
"requester_overrides": {
80+
"run_context": {
81+
"allowed_sources": [
82+
"manual",
83+
"run_*"
84+
],
85+
"denied_sources": [
86+
"manual_scan_*",
87+
"backfill_*"
88+
],
89+
"allowed_categories": [],
90+
"denied_categories": []
91+
},
92+
"smart_scan": {
93+
"allowed_sources": [
94+
"manual",
95+
"run_*"
96+
],
97+
"denied_sources": [],
98+
"allowed_categories": [],
99+
"denied_categories": []
100+
}
101+
}
102+
}
103+
}
48104
},
49105
"gemini": {
50106
"api_key_env": "GEMINI_API_KEY"

config/settings_loader.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,53 @@ def get_embedding_provider() -> str:
403403
return provider or "ollama"
404404

405405

406+
def get_rag_rerank_settings() -> dict:
407+
"""Get normalized RAG reranker settings with safe defaults."""
408+
rag_settings = load_settings().get("rag", {})
409+
rerank = rag_settings.get("rerank", {}) if isinstance(rag_settings, dict) else {}
410+
if not isinstance(rerank, dict):
411+
rerank = {}
412+
413+
def _positive_int(value, default: int) -> int:
414+
try:
415+
parsed = int(value)
416+
except (TypeError, ValueError):
417+
return default
418+
return parsed if parsed > 0 else default
419+
420+
def _optional_positive_int(value):
421+
if value is None:
422+
return None
423+
if isinstance(value, str) and not value.strip():
424+
return None
425+
try:
426+
parsed = int(value)
427+
except (TypeError, ValueError):
428+
return None
429+
return parsed if parsed > 0 else None
430+
431+
def _positive_float(value, default: float) -> float:
432+
try:
433+
parsed = float(value)
434+
except (TypeError, ValueError):
435+
return default
436+
return parsed if parsed > 0 else default
437+
438+
provider = str(rerank.get("provider", "local_oss") or "local_oss").strip().lower()
439+
if provider in {"none", "disabled", "off"}:
440+
provider = "noop"
441+
442+
return {
443+
"enabled": bool(rerank.get("enabled", False)),
444+
"provider": provider,
445+
"model": str(rerank.get("model", "") or "").strip(),
446+
"candidate_k": _positive_int(rerank.get("candidate_k"), 40),
447+
"top_k": _optional_positive_int(rerank.get("top_k")),
448+
"timeout_seconds": _positive_float(rerank.get("timeout_seconds"), 8.0),
449+
"batch_size": _positive_int(rerank.get("batch_size"), 8),
450+
}
451+
452+
406453
def get_timeout() -> int:
407454
"""Get Ollama timeout in seconds."""
408455
return load_settings()["ollama"]["timeout"]

0 commit comments

Comments
 (0)