From 4212f2d80713a802066bd7ed567fc3c990507bd7 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Sat, 29 Mar 2025 14:34:55 -0400 Subject: [PATCH 1/9] Add python 3.13 to version ranges --- .github/workflows/python-package.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index ab9ef2ef0..e57b17330 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -23,7 +23,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] services: redis: image: redis/redis-stack-server:latest diff --git a/pyproject.toml b/pyproject.toml index a46fbd191..ea3e75a8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ version = {attr = "chatterbot.__version__"} [project] name = "ChatterBot" -requires-python = ">=3.9,<3.13" +requires-python = ">=3.9,<3.14" urls = { Documentation = "https://docs.chatterbot.us", Repository = "https://github.com/gunthercox/ChatterBot", Changelog = "https://github.com/gunthercox/ChatterBot/releases" } description = "ChatterBot is a machine learning, conversational dialog engine" authors = [ From 76c509faa97d63a75168e8df85cebc32ad3c35af Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Wed, 18 Jun 2025 08:25:00 -0400 Subject: [PATCH 2/9] Bump langchain-redis version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ea3e75a8a..1f9f8d992 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ dev = [ ] redis = [ "redis[hiredis]<5.3", - "langchain-redis<=0.2.0", + "langchain-redis<=0.2.1", "langchain-huggingface<=0.1.2", "accelerate<=1.6.0", "sentence-transformers<=4.0.2", From a76ef0ab56d3178f0221164ae2e26afad8259368 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Tue, 14 Oct 2025 06:23:27 -0400 Subject: [PATCH 3/9] Fix resource warning in tests by closing connecion. I believe this also closes https://github.com/gunthercox/ChatterBot/issues/2217 by adding an explicit close() call to the storage adapter classes. --- chatterbot/storage/mongodb.py | 7 +++++++ chatterbot/storage/sql_storage.py | 8 ++++++++ chatterbot/storage/storage_adapter.py | 8 ++++++++ tests/base_case.py | 1 + tests/storage/test_sql_adapter.py | 9 +++++++++ 5 files changed, 33 insertions(+) diff --git a/chatterbot/storage/mongodb.py b/chatterbot/storage/mongodb.py index 0c1e60659..d00be1496 100644 --- a/chatterbot/storage/mongodb.py +++ b/chatterbot/storage/mongodb.py @@ -250,3 +250,10 @@ def drop(self): Remove the database. """ self.client.drop_database(self.database.name) + + def close(self): + """ + Close the MongoDB client connection. + """ + if hasattr(self, 'client'): + self.client.close() diff --git a/chatterbot/storage/sql_storage.py b/chatterbot/storage/sql_storage.py index ff05e7434..d5f532d3a 100644 --- a/chatterbot/storage/sql_storage.py +++ b/chatterbot/storage/sql_storage.py @@ -410,3 +410,11 @@ def create_database(self): """ from chatterbot.ext.sqlalchemy_app.models import Base Base.metadata.create_all(self.engine) + + def close(self): + """ + Close the database connection and dispose of the engine. + This ensures proper cleanup of resources. + """ + if hasattr(self, 'engine'): + self.engine.dispose() diff --git a/chatterbot/storage/storage_adapter.py b/chatterbot/storage/storage_adapter.py index 966f8b276..3f7b8c273 100644 --- a/chatterbot/storage/storage_adapter.py +++ b/chatterbot/storage/storage_adapter.py @@ -165,6 +165,14 @@ def drop(self): 'The `drop` method is not implemented by this adapter.' ) + def close(self): + """ + Close any open connections or sessions. + This method should be called when the storage adapter is no longer needed + to properly clean up resources and avoid resource warnings. + """ + pass + class EmptyDatabaseException(Exception): def __init__(self, message=None): diff --git a/tests/base_case.py b/tests/base_case.py index 2e44b7b9b..ad2e88e08 100644 --- a/tests/base_case.py +++ b/tests/base_case.py @@ -85,6 +85,7 @@ def tearDown(self): Remove the test database. """ self.chatbot.storage.drop() + self.chatbot.storage.close() def assertIsLength(self, item, length): """ diff --git a/tests/storage/test_sql_adapter.py b/tests/storage/test_sql_adapter.py index 002008a9e..ab4375c5c 100644 --- a/tests/storage/test_sql_adapter.py +++ b/tests/storage/test_sql_adapter.py @@ -12,6 +12,13 @@ def setUpClass(cls): """ cls.adapter = SQLStorageAdapter(database_uri=None, raise_on_missing_search_text=False) + @classmethod + def tearDownClass(cls): + """ + Close the adapter connection after all tests are run. + """ + cls.adapter.close() + def tearDown(self): """ Drop the tables in the database after each test is run. @@ -24,10 +31,12 @@ class SQLStorageAdapterTests(SQLStorageAdapterTestCase): def test_set_database_uri_none(self): adapter = SQLStorageAdapter(database_uri=None) self.assertEqual(adapter.database_uri, 'sqlite://') + adapter.close() def test_set_database_uri(self): adapter = SQLStorageAdapter(database_uri='sqlite:///db.sqlite3') self.assertEqual(adapter.database_uri, 'sqlite:///db.sqlite3') + adapter.close() def test_count_returns_zero(self): """ From e988c573dea7092608dbec47296ae3966a0d2f74 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Tue, 14 Oct 2025 06:25:53 -0400 Subject: [PATCH 4/9] Fix failing tests due to duplicate keys --- chatterbot/storage/redis.py | 56 ++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/chatterbot/storage/redis.py b/chatterbot/storage/redis.py index 3028cc9ef..5b92da5e5 100644 --- a/chatterbot/storage/redis.py +++ b/chatterbot/storage/redis.py @@ -1,27 +1,25 @@ from datetime import datetime +import re from chatterbot.storage import StorageAdapter from chatterbot.conversation import Statement as StatementObject -# TODO: This list may not be exhaustive. -# Is there a full list of characters reserved by redis? -REDIS_ESCAPE_CHARACTERS = { - '\\': '\\\\', - ':': '\\:', - '|': '\\|', - '%': '\\%', - '!': '\\!', - '-': '\\-', -} - -REDIS_TRANSLATION_TABLE = str.maketrans(REDIS_ESCAPE_CHARACTERS) - - def _escape_redis_special_characters(text): """ Escape special characters in a string that are used in redis queries. + + This function escapes characters that would interfere with the query syntax + used in the filter() method, specifically: + - Pipe (|) which is used as the OR operator when joining search terms + - Characters that could break the wildcard pattern matching """ - return text.translate(REDIS_TRANSLATION_TABLE) + from redisvl.query.filter import TokenEscaper + + # Remove space (last character) and add pipe + escape_pattern = TokenEscaper.DEFAULT_ESCAPED_CHARS.rstrip(' ]') + r'\|]' + + escaper = TokenEscaper(escape_chars_re=re.compile(escape_pattern)) + return escaper.escape(text) class RedisVectorStorageAdapter(StorageAdapter): @@ -284,13 +282,16 @@ def create( _default_date = datetime.now() + # Prevent duplicate tag entries in the database + unique_tags = list(set(tags)) if tags else [] + metadata = { 'text': text, 'category': kwargs.get('category', ''), # NOTE: `created_at` must have a valid numeric value or results will # not be returned for similarity_search for some reason 'created_at': kwargs.get('created_at') or int(_default_date.strftime('%y%m%d')), - 'tags': '|'.join(tags) if tags else '', + 'tags': '|'.join(unique_tags) if unique_tags else '', 'conversation': kwargs.get('conversation', ''), 'persona': kwargs.get('persona', ''), } @@ -298,7 +299,7 @@ def create( ids = self.vector_store.add_texts([in_response_to or ''], [metadata]) metadata['created_at'] = _default_date - metadata['tags'] = tags or [] + metadata['tags'] = unique_tags metadata.pop('text') statement = StatementObject( id=ids[0], @@ -320,7 +321,10 @@ def create_many(self, statements): 'conversation': statement.conversation or '', 'created_at': int(statement.created_at.strftime('%y%m%d')), 'persona': statement.persona or '', - 'tags': '|'.join(statement.tags) if statement.tags else '', + # Prevent duplicate tag entries in the database + 'tags': '|'.join( + list(set(statement.tags)) + ) if statement.tags else '', } ) for statement in statements ] @@ -334,12 +338,15 @@ def update(self, statement): Modifies an entry in the database. Creates an entry if one does not exist. """ + # Prevent duplicate tag entries in the database + unique_tags = list(set(statement.tags)) if statement.tags else [] + metadata = { 'text': statement.text, 'conversation': statement.conversation or '', 'created_at': int(statement.created_at.strftime('%y%m%d')), 'persona': statement.persona or '', - 'tags': '|'.join(statement.tags) if statement.tags else '', + 'tags': '|'.join(unique_tags) if unique_tags else '', } Document = self.get_statement_model() @@ -349,6 +356,9 @@ def update(self, statement): ) if statement.id: + # When updating with an existing ID, first delete the old entry + # to ensure a duplicate entry is not created + self.vector_store.delete(ids=[statement.id.split(':')[1]]) self.vector_store.add_texts( [document.page_content], [metadata], keys=[statement.id.split(':')[1]] ) @@ -389,3 +399,11 @@ def drop(self): # we want is to delete all the keys in the index, but # keep the index itself) # self.vector_store.index.delete(drop=True) + + def close(self): + """ + Close the Redis client connection. + """ + if hasattr(self, 'vector_store') and hasattr(self.vector_store, 'index'): + if hasattr(self.vector_store.index, 'client'): + self.vector_store.index.client.close() From a92369057b0c69612c84aa4fc839830dbf1b08d1 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Tue, 14 Oct 2025 07:57:59 -0400 Subject: [PATCH 5/9] Skip redis tests in Python 3.9 --- tests/storage/test_redis_adapter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/storage/test_redis_adapter.py b/tests/storage/test_redis_adapter.py index 2a91ee043..db77263cc 100644 --- a/tests/storage/test_redis_adapter.py +++ b/tests/storage/test_redis_adapter.py @@ -1,8 +1,14 @@ +import sys +import unittest from unittest import TestCase from chatterbot.conversation import Statement from chatterbot.storage.redis import RedisVectorStorageAdapter +@unittest.skipIf( + sys.version_info < (3, 10), + 'The Redis adapter requires Python 3.10+' +) class RedisStorageAdapterTestCase(TestCase): @classmethod From 15619fadbe1c330a9d345227e94a3a96d215c340 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Tue, 14 Oct 2025 08:23:02 -0400 Subject: [PATCH 6/9] Use vector store directly --- chatterbot/storage/redis.py | 98 +++++++++++++++++++++++++++++++------ 1 file changed, 82 insertions(+), 16 deletions(-) diff --git a/chatterbot/storage/redis.py b/chatterbot/storage/redis.py index 5b92da5e5..82fc89f11 100644 --- a/chatterbot/storage/redis.py +++ b/chatterbot/storage/redis.py @@ -1,4 +1,5 @@ from datetime import datetime +import json import re from chatterbot.storage import StorageAdapter from chatterbot.conversation import Statement as StatementObject @@ -156,7 +157,8 @@ def remove(self, statement): Removes any responses from statements where the response text matches the input text. """ - self.vector_store.delete(ids=[statement.id.split(':')[1]]) + client = self.vector_store.index.client + client.delete(statement.id) def filter(self, page_size=4, **kwargs): """ @@ -178,6 +180,7 @@ def filter(self, page_size=4, **kwargs): - search_in_response_to_contains - order_by """ + from redisvl.query import VectorQuery from redisvl.query.filter import Tag, Text # https://redis.io/docs/latest/develop/interact/search-and-query/advanced-concepts/query_syntax/ @@ -250,14 +253,56 @@ def filter(self, page_size=4, **kwargs): if 'search_in_response_to_contains' in kwargs: _search_text = kwargs.get('search_in_response_to_contains', '') - # TODO similarity_search_with_score - documents = self.vector_store.similarity_search( - _search_text, - k=page_size, # The number of results to return - return_all=True, # Include the full document with IDs - filter=filter_condition, - sort_by=ordering + # Get embedding for the search text + embedding = self.vector_store.embeddings.embed_query(_search_text) + + # Build return fields from metadata schema + return_fields = [ + 'text', 'in_response_to', 'conversation', 'persona', 'tags', 'created_at' + ] + + # Use direct index query via RedisVL + # langchain's similarity_search has issues with filters in v0.2.4 + # and may not work properly with existing indexes + # TODO: Look into similarity_search_with_score implementation + query = VectorQuery( + vector=embedding, + vector_field_name='embedding', + return_fields=return_fields, + num_results=page_size, + filter_expression=filter_condition ) + + # Execute query + results = self.vector_store.index.query(query) + + # Convert results to Document objects + Document = self.get_statement_model() + documents = [] + for result in results: + # Extract metadata and content + in_response_to = result.get('in_response_to', '') + + # Convert created_at from integer (YYMMDD) to datetime + created_at_int = int(result.get('created_at', 0)) + if created_at_int: + created_at = datetime.strptime(str(created_at_int), '%y%m%d') + else: + created_at = datetime.now() + + metadata = { + 'text': result.get('text', ''), + 'conversation': result.get('conversation', ''), + 'persona': result.get('persona', ''), + 'tags': result.get('tags', ''), + 'created_at': created_at, + } + doc = Document( + page_content=in_response_to, + metadata=metadata, + id=result['id'] + ) + documents.append(doc) else: documents = self.vector_store.query_search( k=page_size, @@ -304,6 +349,7 @@ def create( statement = StatementObject( id=ids[0], text=text, + in_response_to=in_response_to, **metadata ) return statement @@ -358,9 +404,10 @@ def update(self, statement): if statement.id: # When updating with an existing ID, first delete the old entry # to ensure a duplicate entry is not created - self.vector_store.delete(ids=[statement.id.split(':')[1]]) + client = self.vector_store.index.client + client.delete(statement.id) self.vector_store.add_texts( - [document.page_content], [metadata], keys=[statement.id.split(':')[1]] + [document.page_content], [metadata], keys=[statement.id.split('::')[1]] ) else: self.vector_store.add_documents([document]) @@ -374,12 +421,31 @@ def get_random(self): random_key = client.randomkey() if random_key: - random_id = random_key.decode().split(':')[1] - - documents = self.vector_store.get_by_ids([random_id]) - - if documents: - return self.model_to_object(documents[0]) + # Get the hash data from Redis + data = client.hgetall(random_key) + + if data and b'_metadata_json' in data: + # Parse the metadata + metadata = json.loads(data[b'_metadata_json'].decode()) + + # Convert created_at from integer (YYMMDD) back to datetime + if 'created_at' in metadata and isinstance(metadata['created_at'], int): + created_at_str = str(metadata['created_at']) + # Parse YYMMDD format + metadata['created_at'] = datetime.strptime(created_at_str, '%y%m%d') + + # Get the in_response_to from the hash + in_response_to = data.get(b'in_response_to', b'').decode() + + # Create a Document-like object to use with model_to_object + Document = self.get_statement_model() + document = Document( + page_content=in_response_to if in_response_to else '', + metadata=metadata, + id=random_key.decode() + ) + + return self.model_to_object(document) raise self.EmptyDatabaseException() From d4ace8ed5d9afcf010ec1468a27b8bea17ea2547 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Tue, 14 Oct 2025 08:28:05 -0400 Subject: [PATCH 7/9] Upgrade langchain-redis --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1f9f8d992..9b759405c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ dev = [ ] redis = [ "redis[hiredis]<5.3", - "langchain-redis<=0.2.1", + "langchain-redis<=0.2.5", "langchain-huggingface<=0.1.2", "accelerate<=1.6.0", "sentence-transformers<=4.0.2", From f0d41021f25742c5e46e2cc489ca385e6f619f67 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Tue, 14 Oct 2025 08:47:52 -0400 Subject: [PATCH 8/9] Resolve search and key issues --- chatterbot/storage/redis.py | 84 ++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/chatterbot/storage/redis.py b/chatterbot/storage/redis.py index 82fc89f11..923ec17e3 100644 --- a/chatterbot/storage/redis.py +++ b/chatterbot/storage/redis.py @@ -245,6 +245,71 @@ def filter(self, page_size=4, **kwargs): else: filter_condition = query + # Handle search_text parameter (used by BestMatch logic adapter) + # BestMatch uses search_text to find statements with matching indexed text. + # Since Redis doesn't store search_text as a field, we approximate this by: + # 1. Using the search_text value as a semantic query against in_response_to + # 2. This finds statements that are responses to similar inputs + # The effect is similar to BestMatch's Phase 2: finding alternate responses + if 'search_text' in kwargs: + _search_text = kwargs.get('search_text', '') + + # Use direct index query via RedisVL + # Search on the vectorized content (in_response_to) to find similar response patterns + from redisvl.query import VectorQuery + + # Get embedding for the search text + # Note: search_text may be indexed (e.g., "NOUN:cat VERB:run") so this + # approximates finding responses to semantically similar queries + embedding = self.vector_store.embeddings.embed_query(_search_text) + + # Build return fields from metadata schema + return_fields = [ + 'text', 'in_response_to', 'conversation', 'persona', 'tags', 'created_at' + ] + + # Create vector query + query = VectorQuery( + vector=embedding, + vector_field_name='embedding', + return_fields=return_fields, + num_results=page_size, + filter_expression=filter_condition + ) + + # Execute query + results = self.vector_store.index.query(query) + + # Convert results to Document objects + Document = self.get_statement_model() + documents = [] + for result in results: + # Extract metadata and content + in_response_to = result.get('in_response_to', '') + + # Convert created_at from integer (YYMMDD) to datetime + created_at_int = int(result.get('created_at', 0)) + if created_at_int: + created_at = datetime.strptime(str(created_at_int), '%y%m%d') + else: + created_at = datetime.now() + + metadata = { + 'text': result.get('text', ''), + 'conversation': result.get('conversation', ''), + 'persona': result.get('persona', ''), + 'tags': result.get('tags', ''), + 'created_at': created_at, + } + doc = Document( + page_content=in_response_to, + metadata=metadata, + id=result['id'] + ) + documents.append(doc) + + return [self.model_to_object(document) for document in documents] + ordering = kwargs.get('order_by', None) if ordering: @@ -406,9 +471,24 @@ def update(self, statement): # to ensure a duplicate entry is not created client = self.vector_store.index.client client.delete(statement.id) - self.vector_store.add_texts( - [document.page_content], [metadata], keys=[statement.id.split('::')[1]] + + # Extract the key portion from the ID + # IDs have the format 'chatterbot::key' (:: is the standard delimiter) + # We need just 'key' for add_texts + key = statement.id.split('::')[1] + + # Note: langchain-redis has an inconsistency - it uses :: for auto-generated + # IDs but : (single colon) when keys are explicitly provided + ids = self.vector_store.add_texts( + [document.page_content], [metadata], keys=[key] ) + + # Normalize the ID to use :: delimiter (if langchain-redis returned single colon) + if ids and ':' in ids[0] and '::' not in ids[0]: + # Replace first occurrence of single colon with double colon + normalized_id = ids[0].replace(':', '::', 1) + # Update the key in Redis to use the correct format + client.rename(ids[0], normalized_id) else: self.vector_store.add_documents([document]) From e2615ae371ab1fc4c7bba854b47fbfccc7734c10 Mon Sep 17 00:00:00 2001 From: Gunther Cox Date: Wed, 15 Oct 2025 08:04:28 -0400 Subject: [PATCH 9/9] Handle differing key format caases --- chatterbot/storage/redis.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/chatterbot/storage/redis.py b/chatterbot/storage/redis.py index 923ec17e3..6764cb7b6 100644 --- a/chatterbot/storage/redis.py +++ b/chatterbot/storage/redis.py @@ -254,10 +254,6 @@ def filter(self, page_size=4, **kwargs): if 'search_text' in kwargs: _search_text = kwargs.get('search_text', '') - # Use direct index query via RedisVL - # Search on the vectorized content (in_response_to) to find similar response patterns - from redisvl.query import VectorQuery - # Get embedding for the search text # Note: search_text may be indexed (e.g., "NOUN:cat VERB:run") so this # approximates finding responses to semantically similar queries @@ -268,7 +264,8 @@ def filter(self, page_size=4, **kwargs): 'text', 'in_response_to', 'conversation', 'persona', 'tags', 'created_at' ] - # Create vector query + # Use direct index query via RedisVL + # Search on the vectorized content (in_response_to) to find similar response patterns query = VectorQuery( vector=embedding, vector_field_name='embedding', @@ -472,13 +469,16 @@ def update(self, statement): client = self.vector_store.index.client client.delete(statement.id) - # Extract the key portion from the ID - # IDs have the format 'chatterbot::key' (:: is the standard delimiter) - # We need just 'key' for add_texts - key = statement.id.split('::')[1] - - # Note: langchain-redis has an inconsistency - it uses :: for auto-generated + # NOTE: langchain-redis has an inconsistency - it uses :: for auto-generated # IDs but : (single colon) when keys are explicitly provided + if '::' in statement.id: + key = statement.id.split('::', 1)[1] + elif ':' in statement.id: + key = statement.id.split(':', 1)[1] + else: + # If no delimiter found, use the entire ID as the key + key = statement.id + ids = self.vector_store.add_texts( [document.page_content], [metadata], keys=[key] )