microsoft
diff --git a/‎python/ta/test/test_demo.py‎
Lines changed: 1 addition & 1 deletion b/‎python/ta/test/test_demo.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎python/ta/test/test_vectorbase.py‎
Lines changed: 1 addition & 2 deletions b/‎python/ta/test/test_vectorbase.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎python/ta/typeagent/aitools/vectorbase.py‎
Lines changed: 58 additions & 16 deletions b/‎python/ta/typeagent/aitools/vectorbase.py‎
Lines changed: 58 additions & 16 deletions
diff --git a/‎python/ta/typeagent/demo/ui.py‎
Lines changed: 12 additions & 5 deletions b/‎python/ta/typeagent/demo/ui.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎python/ta/typeagent/knowpro/convthreads.py‎
Lines changed: 1 addition & 1 deletion b/‎python/ta/typeagent/knowpro/convthreads.py‎
Lines changed: 1 addition & 1 deletion
@@ -117,7 +117,7 @@ async def main(filename: str):
     assert isinstance(ser2, dict), f"ser2 is not dict but {type(ser2)!r}"
     assert len(ser2) > 0, f"ser2 is empty {ser2!r}"
     assert "semanticRefs" in ser2, f"'semantic_refs' is not a key in {ser2.keys()!r}"
-    assert ser1 == ser2, f"ser1 != ser2"
+    assert str(ser1) == str(ser2), f"ser1 != ser2"
 
 
 if __name__ == "__main__":
 
@@ -7,7 +7,6 @@
 from typeagent.aitools.vectorbase import (
     VectorBase,
     TextEmbeddingIndexSettings,
-    ScoredOrdinal,
 )
 from typeagent.aitools.embeddings import AsyncEmbeddingModel, NormalizedEmbedding
 
@@ -106,7 +105,7 @@ async def test_fuzzy_lookup(
 
     results = await vector_base.fuzzy_lookup("word1", max_hits=2)
     assert len(results) == 2
-    assert results[0].ordinal == 0
+    assert results[0].item == 0
     assert results[0].score > 0.9  # High similarity score for the same word
 
 
 
@@ -1,14 +1,21 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
+from collections.abc import Iterable
 from dataclasses import dataclass
-from typing import NamedTuple
+from typing import Callable
 
 import numpy as np
 
 from .embeddings import AsyncEmbeddingModel, NormalizedEmbedding, NormalizedEmbeddings
 
 
+@dataclass
+class Scored:
+    item: int
+    score: float
+
+
 @dataclass
 class TextEmbeddingIndexSettings:
     embedding_model: AsyncEmbeddingModel
@@ -35,11 +42,6 @@ def __init__(
         self.max_matches = max_matches
 
 
-class ScoredOrdinal(NamedTuple):
-    ordinal: int
-    score: float
-
-
 class VectorBase:
     _vectors: NormalizedEmbeddings
 
@@ -79,6 +81,11 @@ def add_embedding(self, key: str | None, embedding: NormalizedEmbedding) -> None
         if key is not None:
             self._model.add_embedding(key, embedding)
 
+    def add_embeddings(self, embeddings: NormalizedEmbeddings) -> None:
+        assert embeddings.ndim == 2
+        assert embeddings.shape[1] == self._embedding_size
+        self._vectors = np.concatenate((self._vectors, embeddings), axis=0)
+
     async def add_key(self, key: str, cache: bool = True) -> None:
         embeddings = (await self.get_embedding(key, cache=cache)).reshape(
             1, -1
@@ -89,29 +96,64 @@ async def add_keys(self, keys: list[str], cache: bool = True) -> None:
         embeddings = await self.get_embeddings(keys, cache=cache)
         self._vectors = np.concatenate((self._vectors, embeddings), axis=0)
 
-    async def fuzzy_lookup(
-        self, key: str, max_hits: int | None = None, min_score: float | None = None
-    ) -> list[ScoredOrdinal]:
+    def fuzzy_lookup_embedding(
+        self,
+        embedding: NormalizedEmbedding,
+        max_hits: int | None = None,
+        min_score: float | None = None,
+        predicate: Callable[[int], bool] | None = None,
+    ) -> list[Scored]:
         if max_hits is None:
             max_hits = 10
         if min_score is None:
             min_score = 0.0
-        embedding = await self.get_embedding(key)
-        scores = np.dot(self._vectors, embedding)  # This does most of the work
+        # This line does most of the work:
+        scores: Iterable[float] = np.dot(self._vectors, embedding)
         scored_ordinals = [
-            ScoredOrdinal(i, float(score))
+            Scored(i, score)
             for i, score in enumerate(scores)
-            if score >= min_score
+            if score >= min_score and (predicate is None or predicate(i))
         ]
         scored_ordinals.sort(key=lambda x: x.score, reverse=True)
         return scored_ordinals[:max_hits]
 
+    # TODO: Make this and fizzy_lookup_embedding() more similar.
+    def fuzzy_lookup_embedding_in_subset(
+        self,
+        embedding: NormalizedEmbedding,
+        ordinals_of_subset: list[int],
+        max_hits: int | None = None,
+        min_score: float | None = None,
+    ) -> list[Scored]:
+        return self.fuzzy_lookup_embedding(
+            embedding, max_hits, min_score, lambda i: i in ordinals_of_subset
+        )
+
+    async def fuzzy_lookup(
+        self,
+        key: str,
+        max_hits: int | None = None,
+        min_score: float | None = None,
+        predicate: Callable[[int], bool] | None = None,
+    ) -> list[Scored]:
+        embedding = await self.get_embedding(key)
+        return self.fuzzy_lookup_embedding(
+            embedding, max_hits=max_hits, min_score=min_score, predicate=predicate
+        )
+
     def clear(self) -> None:
         self._vectors = np.array([], dtype=np.float32)
         self._vectors.shape = (0, self._embedding_size)
 
-    def serialize_embedding_at(self, ordinal: int) -> NormalizedEmbedding | None:
-        return self._vectors[ordinal] if 0 <= ordinal < len(self._vectors) else None
+    def get_embedding_at(self, pos: int) -> NormalizedEmbedding:
+        if 0 <= pos < len(self._vectors):
+            return self._vectors[pos]
+        raise IndexError(
+            f"Index {pos} out of bounds for embedding index of size {len(self)}"
+        )
+
+    def serialize_embedding_at(self, pos: int) -> NormalizedEmbedding | None:
+        return self._vectors[pos] if 0 <= pos < len(self._vectors) else None
 
     def serialize(self) -> NormalizedEmbeddings:
         assert self._vectors.shape == (len(self._vectors), self._embedding_size)
@@ -181,7 +223,7 @@ def debugv(heading: str):
     log("\nFuzzy lookups:")
     for word in words + ["pancakes", "hello world", "book", "author"]:
         neighbors = await v.fuzzy_lookup(word, max_hits=3)
-        log(f"{word}:", [(nb.ordinal, nb.score) for nb in neighbors])
+        log(f"{word}:", [(nb.item, nb.score) for nb in neighbors])
 
 
 if __name__ == "__main__":
 
@@ -3,13 +3,13 @@
 
 import asyncio
 import io
-from pprint import pprint
 import readline
 import shutil
 import sys
 import traceback
 from typing import Any
 
+from black import format_str, FileMode
 import typechat
 
 from ..aitools.auth import load_dotenv
@@ -35,6 +35,15 @@
 cap = min  # More readable name for capping a value at some limit
 
 
+def pretty_print(obj: object) -> None:
+    """Pretty-print an object using black.
+
+    Only works if the repr() is a valid Python expression.
+    """
+    line_width = cap(200, shutil.get_terminal_size().columns)
+    print(format_str(repr(obj), mode=FileMode(line_length=line_width)))
+
+
 def main() -> None:
     load_dotenv()
     translator = create_translator()
@@ -124,8 +133,6 @@ async def process_query(
     conversation: IConversation[IMessage, Any],
     translator: typechat.TypeChatJsonTranslator[SearchQuery],
 ):
-    line_width = cap(200, shutil.get_terminal_size().columns)
-
     # Gradually turn the query text into something we can use to search.
 
     # TODO: # 0. Recognize @-commands like "@search" and handle them specially.
@@ -138,7 +145,7 @@ async def process_query(
     if search_query is None:
         print("Failed to translate command to search terms.")
         return
-    pprint(search_query, width=line_width)
+    pretty_print(search_query)
     print()
 
     # 2. Translate the search query into something directly usable as a query.
@@ -149,7 +156,7 @@ async def process_query(
         return
     for i, query_expr in enumerate(query_exprs):
         print(f"---------- {i} ----------")
-        pprint(query_expr, width=line_width)
+        pretty_print(query_expr)
     print()
 
     # 3. Search!
 
@@ -37,7 +37,7 @@ async def lookup_thread(
         )
         return [
             ScoredThreadOrdinal(
-                match.ordinal,
+                match.item,
                 match.score,
             )
             for match in matches
Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ async def lookup_thread(`
`37`	`37`	`)`
`38`	`38`	`return [`
`39`	`39`	`ScoredThreadOrdinal(`
`40`		`- match.ordinal,`
	`40`	`+ match.item,`
`41`	`41`	`match.score,`
`42`	`42`	`)`
`43`	`43`	`for match in matches`