Skip to content

Commit 2c32ffa

Browse files
authored
fix(mental-models): add tags_match and tag_groups to trigger config (#786) (#804)
When a mental model has tags, refresh_mental_model hardcoded tags_match="all_strict", causing empty results when most memories are untagged. Add configurable tags_match and tag_groups fields to MentalModelTrigger so users can control refresh filtering. - Add tags_match (any/all/any_strict/all_strict) to override default - Add tag_groups for compound boolean tag expressions during refresh - Default behavior unchanged (all_strict when tags present) - Update both refresh paths (task-based and direct) - Add UI controls in Create/Update mental model dialogs - Regenerate OpenAPI spec and client SDKs
1 parent baf5447 commit 2c32ffa

49 files changed

Lines changed: 3747 additions & 549 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

hindsight-api-slim/hindsight_api/api/http.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1502,6 +1502,23 @@ class MentalModelTrigger(BaseModel):
15021502
default=None,
15031503
description="Exclude specific mental models by ID from the reflect loop.",
15041504
)
1505+
tags_match: TagsMatch | None = Field(
1506+
default=None,
1507+
description=(
1508+
"Override how the model's tags filter memories during refresh. "
1509+
"If not set, defaults to 'all_strict' when the model has tags (security isolation) "
1510+
"or 'any' when the model has no tags. "
1511+
"Set to 'any' to include untagged memories alongside tagged ones during refresh."
1512+
),
1513+
)
1514+
tag_groups: list[TagGroup] | None = Field(
1515+
default=None,
1516+
description=(
1517+
"Compound boolean tag expressions to use during refresh instead of the model's own tags. "
1518+
"When set, these tag groups are passed to reflect and the model's flat tags are NOT used for filtering. "
1519+
"Supports nested and/or/not expressions for complex tag-based scoping."
1520+
),
1521+
)
15051522

15061523
@field_validator("fact_types")
15071524
@classmethod

hindsight-api-slim/hindsight_api/engine/memory_engine.py

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import time
1717
import uuid
1818
from collections.abc import Awaitable, Callable
19+
from dataclasses import dataclass
1920
from datetime import UTC, datetime, timedelta, timezone
2021
from typing import TYPE_CHECKING, Any
2122

@@ -226,6 +227,42 @@ def _get_tiktoken_encoding():
226227
return _TIKTOKEN_ENCODING
227228

228229

230+
@dataclass(frozen=True)
231+
class RefreshTagFiltering:
232+
"""Resolved tag filtering parameters for mental model refresh."""
233+
234+
tags: list[str] | None
235+
tags_match: TagsMatch
236+
tag_groups: list[TagGroup] | None
237+
238+
239+
def _resolve_refresh_tag_filtering(
240+
model_tags: list[str] | None,
241+
trigger_data: dict[str, Any],
242+
) -> RefreshTagFiltering:
243+
"""Resolve tag filtering parameters for mental model refresh.
244+
245+
Takes raw trigger dict from DB (JSONB with no fixed schema guarantee)
246+
and resolves the tag filtering to use during reflect.
247+
248+
Priority:
249+
- If trigger has tag_groups, use those (overrides flat tags entirely)
250+
- If trigger has tags_match, use model's tags with that match mode
251+
- Otherwise default to all_strict when tags present (security isolation)
252+
"""
253+
trigger_tag_groups = trigger_data.get("tag_groups")
254+
if trigger_tag_groups is not None:
255+
from pydantic import TypeAdapter
256+
257+
adapter = TypeAdapter(TagGroup)
258+
parsed = [adapter.validate_python(tg) for tg in trigger_tag_groups]
259+
return RefreshTagFiltering(tags=None, tags_match="any", tag_groups=parsed)
260+
261+
trigger_tags_match = trigger_data.get("tags_match")
262+
tags_match: TagsMatch = trigger_tags_match if trigger_tags_match else ("all_strict" if model_tags else "any")
263+
return RefreshTagFiltering(tags=model_tags, tags_match=tags_match, tag_groups=None)
264+
265+
229266
class MemoryEngine(MemoryEngineInterface):
230267
"""
231268
Advanced memory system using temporal and semantic linking with PostgreSQL.
@@ -908,26 +945,23 @@ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
908945

909946
source_query = mental_model["source_query"]
910947

911-
# SECURITY: If the mental model has tags, pass them to reflect with "all_strict" matching
912-
# to ensure it can only access other mental models/memories with the SAME tags.
913-
# This prevents cross-tenant/cross-user information leakage by excluding untagged content.
914-
tags = mental_model.get("tags")
915-
tags_match = "all_strict" if tags else "any"
916-
917948
# Read reflect options from trigger (if stored)
918949
trigger_data = mental_model.get("trigger") or {}
919950
fact_types = trigger_data.get("fact_types")
920951
exclude_mental_models = trigger_data.get("exclude_mental_models", False)
921952
stored_exclude_ids: list[str] = trigger_data.get("exclude_mental_model_ids") or []
922953

954+
tag_filtering = _resolve_refresh_tag_filtering(mental_model.get("tags"), trigger_data)
955+
923956
# Run reflect to generate new content, excluding the mental model being refreshed
924957
# Always add self to excluded IDs to prevent circular reference
925958
reflect_result = await self.reflect_async(
926959
bank_id=bank_id,
927960
query=source_query,
928961
request_context=internal_context,
929-
tags=tags,
930-
tags_match=tags_match,
962+
tags=tag_filtering.tags,
963+
tags_match=tag_filtering.tags_match,
964+
tag_groups=tag_filtering.tag_groups,
931965
fact_types=fact_types,
932966
exclude_mental_models=exclude_mental_models,
933967
exclude_mental_model_ids=list({*stored_exclude_ids, mental_model_id}),
@@ -6581,26 +6615,23 @@ async def refresh_mental_model(
65816615

65826616
# Create parent span for mental model refresh operation
65836617
with create_operation_span("mental_model_refresh", bank_id):
6584-
# SECURITY: If the mental model has tags, pass them to reflect with "all_strict" matching
6585-
# to ensure it can only access other mental models/memories with the SAME tags.
6586-
# This prevents cross-tenant/cross-user information leakage by excluding untagged content.
6587-
tags = mental_model.get("tags")
6588-
tags_match = "all_strict" if tags else "any"
6589-
65906618
# Read reflect options from trigger (if stored)
65916619
trigger_data = mental_model.get("trigger") or {}
65926620
fact_types = trigger_data.get("fact_types")
65936621
exclude_mental_models = trigger_data.get("exclude_mental_models", False)
65946622
stored_exclude_ids: list[str] = trigger_data.get("exclude_mental_model_ids") or []
65956623

6624+
tag_filtering = _resolve_refresh_tag_filtering(mental_model.get("tags"), trigger_data)
6625+
65966626
# Run reflect with the source query, excluding the mental model being refreshed
65976627
# Skip creating a nested "hindsight.reflect" span since we already have "hindsight.mental_model_refresh"
65986628
reflect_result = await self.reflect_async(
65996629
bank_id=bank_id,
66006630
query=mental_model["source_query"],
66016631
request_context=request_context,
6602-
tags=tags,
6603-
tags_match=tags_match,
6632+
tags=tag_filtering.tags,
6633+
tags_match=tag_filtering.tags_match,
6634+
tag_groups=tag_filtering.tag_groups,
66046635
fact_types=fact_types,
66056636
exclude_mental_models=exclude_mental_models,
66066637
exclude_mental_model_ids=list({*stored_exclude_ids, mental_model_id}),

0 commit comments

Comments
 (0)