Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 33 additions & 10 deletions libs/community/langchain_community/vectorstores/falkordb_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def generate_random_string(length: int) -> str:
return random_string


DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.COSINE
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.EUCLIDEAN_DISTANCE
DISTANCE_MAPPING = {
DistanceStrategy.EUCLIDEAN_DISTANCE: "euclidean",
DistanceStrategy.COSINE: "cosine",
Expand Down Expand Up @@ -384,7 +384,7 @@ def __init__(
DistanceStrategy.COSINE,
]:
raise ValueError(
"`distance_strategy` must be either 'EULIDEAN_DISTANCE` or `COSINE`"
"`distance_strategy` must be either `EUCLIDEAN_DISTANCE` or `COSINE`"
)

# Graph object takes precedent over env or input params
Expand Down Expand Up @@ -492,6 +492,9 @@ def retrieve_existing_node_index(
Check if the vector index exists in the FalkorDB database
and returns its embedding dimension, entity_type,
entity_label, entity_property

This version also validates the similarity_function against the configured
distance_strategy, so we don't silently reuse an index with the wrong distance metric.

This method;
1. queries the FalkorDB database for existing indexes
Expand Down Expand Up @@ -539,9 +542,20 @@ def retrieve_existing_node_index(
entity_type = str(dict["entity_type"])
entity_label = str(dict["entity_label"])
entity_property = str(dict["entity_property"])
similarity_function = dict.get("index_similarityFunction")
break
if embedding_dimension and entity_type and entity_label and entity_property:
self._index_type = IndexType(entity_type)
desired_sim = DISTANCE_MAPPING[self._distance_strategy]
if similarity_function and similarity_function != desired_sim:
raise ValueError(
f"Existing index on {entity_label}.{entity_property} "
f"uses similarity_function='{similarity_function}', "
f"but requested distance_strategy is '{self._distance_strategy}' "
f"({desired_sim}). "
"Drop/recreate the index or change the distance_strategy."
)

return embedding_dimension, entity_type, entity_label, entity_property
else:
return None, None, None, None
Expand Down Expand Up @@ -729,7 +743,7 @@ def create_new_index_on_relationship(
relation_type,
embedding_node_property,
dim=embedding_dimension,
similarity_function=DISTANCE_MAPPING[DEFAULT_DISTANCE_STRATEGY],
similarity_function=DISTANCE_MAPPING[self._distance_strategy],
)
except Exception as e:
if "already indexed" in str(e):
Expand Down Expand Up @@ -949,6 +963,7 @@ def __from(
metadatas: Optional[List[dict]] = None,
ids: Optional[List[str]] = None,
search_type: SearchType = SearchType.VECTOR,
distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY,
**kwargs: Any,
) -> FalkorDBVector:
if ids is None:
Expand All @@ -960,8 +975,10 @@ def __from(
store = cls(
embedding=embedding,
search_type=search_type,
distance_strategy=distance_strategy,
**kwargs,
)
)


# Check if the vector index already exists
embedding_dimension, index_type, entity_label, entity_property = (
Expand Down Expand Up @@ -1139,6 +1156,7 @@ def from_existing_graph(
*,
search_type: SearchType = DEFAULT_SEARCH_TYPE,
retrieval_query: str = "",
distance_strategy = DEFAULT_DISTANCE_STRATEGY,
**kwargs: Any,
) -> FalkorDBVector:
"""
Expand Down Expand Up @@ -1198,6 +1216,7 @@ def from_existing_graph(
retrieval_query=retrieval_query,
node_label=node_label,
embedding_node_property=embedding_node_property,
distance_strategy=distance_strategy,
**kwargs,
)

Expand Down Expand Up @@ -1444,15 +1463,19 @@ def similarity_search_with_score_by_vector(
f"n.{self.embedding_node_property} IS NOT NULL AND "
)

base_cosine_query = (
if self._distance_strategy == DistanceStrategy.COSINE:
base_distance_query = (
" WITH n as node, "
f" vec.cosineDistance(n.{self.embedding_node_property}"
", vecf32($embedding)) as score "
)

f" vec.cosineDistance(n.{self.embedding_node_property}, vecf32($embedding)) as score "
)
else:
base_distance_query = (
" WITH n as node, "
f" vec.euclideanDistance(n.{self.embedding_node_property}, vecf32($embedding)) as score "
)
filter_snippets, filter_params = construct_metadata_filter(filter)

index_query = base_index_query + filter_snippets + base_cosine_query
index_query = base_index_query + filter_snippets + base_distance_query
else:
index_query = _get_search_index_query(self.search_type, self._index_type)
filter_params = {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""
Unit tests for FalkorDBVector distance strategy handling.

These tests verify that the FalkorDB LangChain wrapper respects the
configured distance strategy when creating relationship indexes,
propagates a custom distance strategy when instantiating from
documents, and builds the correct distance function into the metadata
filter search query.

The tests use unittest.mock to avoid requiring a live FalkorDB
instance. They focus on the behaviour of the wrapper itself.
"""

from typing import Any, List
from unittest.mock import MagicMock

import pytest
from langchain_core.documents import Document

from langchain_community.vectorstores.falkordb_vector import (
FalkorDBVector,
IndexType,
)
from langchain_community.vectorstores.utils import DistanceStrategy


class DummyEmbeddings:
"""A minimal embeddings implementation for testing.

This class implements the methods expected by FalkorDBVector
but returns trivial fixed‑size vectors so that tests can run
without access to external embedding models.
"""

def __init__(self, size: int = 2) -> None:
self.size = size

def embed_documents(self, texts: List[str]) -> List[List[float]]:
# Return a distinct vector for each document; dimension is ``self.size``.
return [[float(i + 1) for _ in range(self.size)] for i in range(len(texts))]

def embed_query(self, text: str) -> List[float]:
# Return a simple vector of the correct dimension for any query.
return [1.0 for _ in range(self.size)]


def test_create_new_index_on_relationship_respects_strategy() -> None:
"""Ensure that create_new_index_on_relationship uses the configured metric."""
# Mock graph and database; create_edge_vector_index should record its kwargs.
fake_db = MagicMock()
fake_graph = MagicMock()
fake_graph._graph = fake_db
fake_graph._driver = MagicMock()

# Instantiate a FalkorDBVector with cosine distance
store = FalkorDBVector(
embedding=DummyEmbeddings(),
graph=fake_graph,
relation_type="REL",
embedding_node_property="embedding",
embedding_dimension=2,
distance_strategy=DistanceStrategy.COSINE,
)

store.create_new_index_on_relationship()
# Verify that the underlying DB method was called with similarity_function="cosine"
assert fake_db.create_edge_vector_index.call_count == 1
_, kwargs = fake_db.create_edge_vector_index.call_args
assert kwargs["similarity_function"] == "cosine"


def test_from_documents_propagates_distance_strategy() -> None:
"""Ensure that from_documents forwards distance_strategy to the store."""
fake_db = MagicMock()
fake_graph = MagicMock()
fake_graph._graph = fake_db
fake_graph._driver = MagicMock()

docs = [Document(page_content="alpha"), Document(page_content="beta")]
store = FalkorDBVector.from_documents(
documents=docs,
embedding=DummyEmbeddings(),
graph=fake_graph,
embedding_dimension=2,
node_label="Test",
distance_strategy=DistanceStrategy.COSINE,
)

assert store._distance_strategy == DistanceStrategy.COSINE


def test_similarity_search_with_score_by_vector_uses_correct_distance() -> None:
"""Ensure metadata-filtered vector search uses the correct distance function."""
# Prepare a store with cosine distance
fake_db = MagicMock()
fake_graph = MagicMock()
fake_graph._graph = fake_db
fake_graph._driver = MagicMock()

store = FalkorDBVector(
embedding=DummyEmbeddings(),
graph=fake_graph,
node_label="Chunk",
embedding_node_property="embedding",
embedding_dimension=2,
distance_strategy=DistanceStrategy.COSINE,
)
# Manually set index type for query construction
store._index_type = IndexType.NODE

captured: dict[str, Any] = {}

def fake_query(query: str, params: Any = None) -> List[Any]:
captured["query"] = query
return []

# Patch the _query method to capture the query string
store._query = fake_query # type: ignore[assignment]

# Perform a similarity search with a metadata filter; query should contain cosine distance
store.similarity_search_with_score_by_vector(
embedding=[0.1, 0.2], k=1, filter={"lang": "en"}
)
assert "vec.cosineDistance" in captured["query"]

# Repeat for Euclidean strategy
store2 = FalkorDBVector(
embedding=DummyEmbeddings(),
graph=fake_graph,
node_label="Chunk",
embedding_node_property="embedding",
embedding_dimension=2,
distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
)
store2._index_type = IndexType.NODE

captured2: dict[str, Any] = {}

def fake_query2(query: str, params: Any = None) -> List[Any]:
captured2["query"] = query
return []

store2._query = fake_query2 # type: ignore[assignment]

store2.similarity_search_with_score_by_vector(
embedding=[0.3, 0.4], k=1, filter={"lang": "en"}
)
assert "vec.euclideanDistance" in captured2["query"]