Skip to content
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
51ce1e0
Abstract full-text query construction into helper class
vishal-bala Dec 4, 2025
9b7283b
Remove unused imports
vishal-bala Dec 4, 2025
99f9d99
Formatting
vishal-bala Dec 4, 2025
527b024
Implement `HybridQuery` with tests
vishal-bala Dec 4, 2025
6c0edd7
Implement vsim search method params and vsim filtering in `HybridQuery`
vishal-bala Dec 4, 2025
da2283e
Update `redisvl.query.aggregate.HybridQuery` deprecation message
vishal-bala Dec 4, 2025
80f1927
Add support for combination methods and postprocessing
vishal-bala Dec 5, 2025
9832369
Update hybrid search usage based on in-practice constraints
vishal-bala Dec 5, 2025
b691255
Update/fix existing tests
vishal-bala Dec 5, 2025
4b3a1fe
Implement async hybrid search
vishal-bala Dec 5, 2025
4d3ba70
Update docstrings
vishal-bala Dec 5, 2025
093fbde
Update GH Actions test configuration to include Redis 8.4.0 and redis…
vishal-bala Dec 5, 2025
1c0e77f
Update uv.lock
vishal-bala Dec 5, 2025
6fdf59a
Python 3.9 compatibility fixes
vishal-bala Dec 5, 2025
900073a
Fix method reference
vishal-bala Dec 5, 2025
e1e261d
Catch `ModuleNotFoundError` as well
vishal-bala Dec 8, 2025
cee1fba
Standardize test skip reason
vishal-bala Dec 8, 2025
cdba22f
Update expected number of results to hybrid search default
vishal-bala Dec 8, 2025
3173502
Remove ambiguous `redisvl.query` import for `HybridQuery`
vishal-bala Dec 8, 2025
ecd5c1b
Update docs
vishal-bala Dec 8, 2025
bf87c6a
Update imports
vishal-bala Dec 8, 2025
e44c439
Fix test skipping logic (for Python 3.9 issues)
vishal-bala Dec 8, 2025
7210aae
Re-add additional test skipping logic
vishal-bala Dec 8, 2025
3b6ef3a
Oops missed one
vishal-bala Dec 8, 2025
9c5fb4b
Remove deprecated `HybridQuery` class
vishal-bala Dec 8, 2025
8d4e91d
Manage dependency logic for user guide notebook
vishal-bala Dec 8, 2025
362fbf5
Make `hybrid_search` always available but validate and raise errors
vishal-bala Dec 9, 2025
eb8e3ae
Add warning note about inconsistent linear combination definitions be…
vishal-bala Dec 9, 2025
5109c7a
Validate that `hybrid_search` method exists
vishal-bala Dec 9, 2025
1a71bef
Fix error message check
vishal-bala Dec 9, 2025
9acdaab
Add See Also references [skip ci]
vishal-bala Dec 9, 2025
eb49885
Ditch Redis 8.0.2 from testing matrix
vishal-bala Dec 10, 2025
d8accf5
Reflect query syntax in hybrid query docs note
vishal-bala Dec 10, 2025
58e8da6
Document that default combination method is RRF
vishal-bala Dec 10, 2025
ca256e8
Update docs overview to show "Python >=3.9", not 3.8
vishal-bala Dec 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ jobs:
matrix:
# 3.11 tests are run in the service-tests job
python-version: ["3.9", "3.10", "3.12", "3.13"]
redis-py-version: ["5.x", "6.x"]
redis-version: ["6.2.6-v9", "latest", "8.0.2"]
redis-py-version: ["5.x", "6.x", "7.x"]
redis-version: ["6.2.6-v9", "latest", "8.0.2", "8.4.0"]
steps:
- name: Check out repository
uses: actions/checkout@v4
Expand Down Expand Up @@ -130,13 +130,15 @@ jobs:
# Install right redis version based on redis py
if [[ "${{ matrix.redis-py-version }}" == "5.x" ]]; then
uv pip install "redis>=5,<6"
else
elif [[ "${{ matrix.redis-py-version }}" == "6.x" ]]; then
uv pip install "redis>=6,<7"
else
uv pip install "redis>=7,<8"
fi

- name: Set Redis image name
run: |
if [[ "${{ matrix.redis-version }}" == "8.0.2" ]]; then
if [[ "${{ matrix.redis-version }}" == "8.0.2" || "${{ matrix.redis-version }}" == "8.4.0" ]]; then
echo "REDIS_IMAGE=redis:${{ matrix.redis-version }}" >> $GITHUB_ENV
else
echo "REDIS_IMAGE=redis/redis-stack-server:${{ matrix.redis-version }}" >> $GITHUB_ENV
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
dependencies = [
"numpy>=1.26.0,<3",
"pyyaml>=5.4,<7.0",
"redis>=5.0,<7.0",
"redis>=5.0,<7.2",
"pydantic>=2,<3",
"tenacity>=8.2.2",
"ml-dtypes>=0.4.0,<1.0.0",
Expand Down
120 changes: 120 additions & 0 deletions redisvl/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,18 @@
from redis.commands.search.aggregation import AggregateResult
from redis.commands.search.document import Document
from redis.commands.search.result import Result

from redisvl.query.query import BaseQuery

try:
from redis.commands.search.hybrid_result import HybridResult

from redisvl.query.hybrid import HybridQuery

REDIS_HYBRID_AVAILABLE = True
except ImportError:
REDIS_HYBRID_AVAILABLE = False

from redis import __version__ as redis_version
from redis.client import NEVER_DECODE

Expand Down Expand Up @@ -215,6 +225,13 @@ def _process(row):
return [_process(r) for r in results.rows]


if REDIS_HYBRID_AVAILABLE:

def process_hybrid_results(results: HybridResult) -> List[Dict[str, Any]]:
"""Convert a hybrid result object into a list of document dictionaries."""
return [convert_bytes(r) for r in results.results]


class BaseSearchIndex:
"""Base search engine class"""

Expand Down Expand Up @@ -1003,6 +1020,56 @@ def search(self, *args, **kwargs) -> "Result":
except Exception as e:
raise RedisSearchError(f"Unexpected error while searching: {str(e)}") from e

if REDIS_HYBRID_AVAILABLE:

def hybrid_search(self, query: HybridQuery, **kwargs) -> List[Dict[str, Any]]:
"""Perform a hybrid search against the index, combining text and vector search.

Args:
query: The text+vector search query to be performed, with configurable fusion methods and
post-processing.
kwargs: Additional arguments to pass to the redis-py hybrid_search method (e.g. timeout).

Returns:
List[Dict[str, Any]]: The search results ordered by combined score unless otherwise specified.

Notes:
Hybrid search is only available in Redis 8.4.0+, and requires redis-py >= 7.1.0.

.. code-block:: python

from redisvl.query.hybrid import HybridQuery

hybrid_query = HybridQuery(
text="lorem ipsum dolor sit amet",
text_field_name="description",
vector=[0.1, 0.2, 0.3],
vector_field_name="embedding"
)

results = index.hybrid_search(hybrid_query)

"""
index = self._redis_client.ft(self.schema.index.name)
if not hasattr(index, "hybrid_search"):
# TODO: Clarify correct message - it seems to not be available in Python 3.9
raise NotImplementedError(
"Hybrid search is not available in this version of redis-py. "
"Please upgrade to redis-py >= 7.1.0."
)

results: HybridResult = index.hybrid_search(
query=query.query,
combine_method=query.combination_method,
post_processing=(
query.postprocessing_config
if query.postprocessing_config.build_args()
else None
),
**kwargs,
) # type: ignore
return process_hybrid_results(results)

def batch_query(
self, queries: Sequence[BaseQuery], batch_size: int = 10
) -> List[List[Dict[str, Any]]]:
Expand Down Expand Up @@ -1824,6 +1891,59 @@ async def search(self, *args, **kwargs) -> "Result":
except Exception as e:
raise RedisSearchError(f"Unexpected error while searching: {str(e)}") from e

if REDIS_HYBRID_AVAILABLE:

async def hybrid_search(
self, query: HybridQuery, **kwargs
) -> List[Dict[str, Any]]:
"""Perform a hybrid search against the index, combining text and vector search.

Args:
query: The text+vector search query to be performed, with configurable fusion methods and
post-processing.
kwargs: Additional arguments to pass to the redis-py hybrid_search method (e.g. timeout).

Returns:
List[Dict[str, Any]]: The search results ordered by combined score unless otherwise specified.

Notes:
Hybrid search is only available in Redis 8.4.0+, and requires redis-py >= 7.1.0.

.. code-block:: python

from redisvl.query.hybrid import HybridQuery

hybrid_query = HybridQuery(
text="lorem ipsum dolor sit amet",
text_field_name="description",
vector=[0.1, 0.2, 0.3],
vector_field_name="embedding"
)

results = index.hybrid_search(hybrid_query)

"""
client = await self._get_client()
index = client.ft(self.schema.index.name)
if not hasattr(index, "hybrid_search"):
# TODO: Clarify correct message - it seems to not be available in Python 3.9
raise NotImplementedError(
"Hybrid search is not available in this version of redis-py. "
"Please upgrade to redis-py >= 7.1.0."
)

results: HybridResult = await index.hybrid_search(
query=query.query,
combine_method=query.combination_method,
post_processing=(
query.postprocessing_config
if query.postprocessing_config.build_args()
else None
),
**kwargs,
) # type: ignore
return process_hybrid_results(results)

async def batch_query(
self, queries: List[BaseQuery], batch_size: int = 10
) -> List[List[Dict[str, Any]]]:
Expand Down
138 changes: 26 additions & 112 deletions redisvl/query/aggregate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import warnings
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from typing import Any, Dict, List, Optional, Set, Union

from pydantic import BaseModel, field_validator, model_validator
from redis.commands.search.aggregation import AggregateRequest, Desc
Expand All @@ -8,7 +8,7 @@
from redisvl.query.filter import FilterExpression
from redisvl.redis.utils import array_to_buffer
from redisvl.schema.fields import VectorDataType
from redisvl.utils.token_escaper import TokenEscaper
from redisvl.utils.full_text_query_helper import FullTextQueryHelper
from redisvl.utils.utils import lazy_import

nltk = lazy_import("nltk")
Expand Down Expand Up @@ -124,7 +124,7 @@ def __init__(
num_results (int, optional): The number of results to return. Defaults to 10.
return_fields (Optional[List[str]], optional): The fields to return. Defaults to None.
stopwords (Optional[Union[str, Set[str]]], optional): The stopwords to remove from the
provided text prior to searchuse. If a string such as "english" "german" is
provided text prior to search-use. If a string such as "english" "german" is
provided then a default set of stopwords for that language will be used. if a list,
set, or tuple of strings is provided then those will be used as stopwords.
Defaults to "english". if set to "None" then no stopwords will be removed.
Expand Down Expand Up @@ -159,8 +159,11 @@ def __init__(
self._alpha = alpha
self._dtype = dtype
self._num_results = num_results
self._set_stopwords(stopwords)
self._text_weights = self._parse_text_weights(text_weights)

self._ft_helper = FullTextQueryHelper(
stopwords=stopwords,
text_weights=text_weights,
)

query_string = self._build_query_string()
super().__init__(query_string)
Expand Down Expand Up @@ -198,115 +201,31 @@ def stopwords(self) -> Set[str]:
Returns:
Set[str]: The stopwords used in the query.
"""
return self._stopwords.copy() if self._stopwords else set()
return self._ft_helper.stopwords

def _set_stopwords(self, stopwords: Optional[Union[str, Set[str]]] = "english"):
"""Set the stopwords to use in the query.
Args:
stopwords (Optional[Union[str, Set[str]]]): The stopwords to use. If a string
such as "english" "german" is provided then a default set of stopwords for that
language will be used. if a list, set, or tuple of strings is provided then those
will be used as stopwords. Defaults to "english". if set to "None" then no stopwords
will be removed.

Raises:
TypeError: If the stopwords are not a set, list, or tuple of strings.
"""
if not stopwords:
self._stopwords = set()
elif isinstance(stopwords, str):
try:
nltk.download("stopwords", quiet=True)
self._stopwords = set(nltk_stopwords.words(stopwords))
except ImportError:
raise ValueError(
f"Loading stopwords for {stopwords} failed: nltk is not installed."
)
except Exception as e:
raise ValueError(f"Error trying to load {stopwords} from nltk. {e}")
elif isinstance(stopwords, (Set, List, Tuple)) and all( # type: ignore
isinstance(word, str) for word in stopwords
):
self._stopwords = set(stopwords)
else:
raise TypeError("stopwords must be a set, list, or tuple of strings")

def _tokenize_and_escape_query(self, user_query: str) -> str:
"""Convert a raw user query to a redis full text query joined by ORs
Args:
user_query (str): The user query to tokenize and escape.
@property
def text_weights(self) -> Dict[str, float]:
"""Get the text weights.

Returns:
str: The tokenized and escaped query string.

Raises:
ValueError: If the text string becomes empty after stopwords are removed.
Dictionary of word:weight mappings.
"""
escaper = TokenEscaper()

tokens = [
escaper.escape(
token.strip().strip(",").replace("“", "").replace("”", "").lower()
)
for token in user_query.split()
]

token_list = [
token for token in tokens if token and token not in self._stopwords
]
for i, token in enumerate(token_list):
if token in self._text_weights:
token_list[i] = f"{token}=>{{$weight:{self._text_weights[token]}}}"

if not token_list:
raise ValueError("text string cannot be empty after removing stopwords")
return " | ".join(token_list)

def _parse_text_weights(
self, weights: Optional[Dict[str, float]]
) -> Dict[str, float]:
parsed_weights: Dict[str, float] = {}
if not weights:
return parsed_weights
for word, weight in weights.items():
word = word.strip().lower()
if not word or " " in word:
raise ValueError(
f"Only individual words may be weighted. Got {{ {word}:{weight} }}"
)
if (
not (isinstance(weight, float) or isinstance(weight, int))
or weight < 0.0
):
raise ValueError(
f"Weights must be positive number. Got {{ {word}:{weight} }}"
)
parsed_weights[word] = weight
return parsed_weights
return self._ft_helper.text_weights

def set_text_weights(self, weights: Dict[str, float]):
"""Set or update the text weights for the query.

Args:
text_weights: Dictionary of word:weight mappings
weights: Dictionary of word:weight mappings
"""
self._text_weights = self._parse_text_weights(weights)
self._ft_helper.set_text_weights(weights)
self._query = self._build_query_string()

@property
def text_weights(self) -> Dict[str, float]:
"""Get the text weights.

Returns:
Dictionary of word:weight mappings.
"""
return self._text_weights

def _build_query_string(self) -> str:
"""Build the full query string for text search with optional filtering."""
filter_expression = self._filter_expression
if isinstance(self._filter_expression, FilterExpression):
filter_expression = str(self._filter_expression)
text = self._ft_helper.build_query_string(
self._text, self._text_field, self._filter_expression
)

# Build KNN query
knn_query = (
Expand All @@ -316,12 +235,7 @@ def _build_query_string(self) -> str:
# Add distance field alias
knn_query += f" AS {self.DISTANCE_ID}"

text = f"(~@{self._text_field}:({self._tokenize_and_escape_query(self._text)})"

if filter_expression and filter_expression != "*":
text += f" AND {filter_expression}"

return f"{text})=>[{knn_query}]"
return f"{text}=>[{knn_query}]"

def __str__(self) -> str:
"""Return the string representation of the query."""
Expand All @@ -332,16 +246,16 @@ class HybridQuery(AggregateHybridQuery):
"""Backward compatibility wrapper for AggregateHybridQuery.

.. deprecated::
HybridQuery is a backward compatibility wrapper around AggregateHybridQuery
and will eventually be replaced with a new hybrid query implementation.
To maintain current functionality please use AggregateHybridQuery directly.",
This class is deprecated and will be removed in a future version.
Please use the new HybridQuery from redisvl.query.hybrid instead.
For maintaining current AggregateHybridQuery functionality, use AggregateHybridQuery directly.
"""

def __init__(self, *args, **kwargs):
warnings.warn(
"HybridQuery is a backward compatibility wrapper around AggregateHybridQuery "
"and will eventually be replaced with a new hybrid query implementation. "
"To maintain current functionality please use AggregateHybridQuery directly.",
"This HybridQuery class is deprecated and will be removed in a future version. "
"Please use the new HybridQuery from redisvl.query.hybrid instead. "
"For maintaining current AggregateHybridQuery functionality, use AggregateHybridQuery directly.",
DeprecationWarning,
stacklevel=2,
)
Expand Down
Loading
Loading