From 825c2f354e3a062b489f868e89b97c82a4c2b782 Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Fri, 10 Oct 2025 18:02:34 +0200 Subject: [PATCH 1/3] test(embeddings): add mock-based unit tests for external providers Add comprehensive mock-based unit tests for Cohere and OpenAI embedding providers that run without requiring API credentials. Tests cover: - Provider initialization with known/unknown models - Sync and async encoding methods - Custom parameters (input_type, api_key) - ImportError handling - All predefined model configurations These tests complement existing live integration tests and enable consistent CI/CD testing without external API dependencies. --- tests/test_embeddings_providers_mock.py | 334 ++++++++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 tests/test_embeddings_providers_mock.py diff --git a/tests/test_embeddings_providers_mock.py b/tests/test_embeddings_providers_mock.py new file mode 100644 index 000000000..e887a7250 --- /dev/null +++ b/tests/test_embeddings_providers_mock.py @@ -0,0 +1,334 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from unittest.mock import MagicMock, Mock, patch + +import pytest + + +class TestCohereEmbeddingModelMocked: + def test_init_with_known_model(self): + mock_cohere = MagicMock() + mock_client = Mock() + mock_cohere.Client.return_value = mock_client + + with patch.dict("sys.modules", {"cohere": mock_cohere}): + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel + + model = CohereEmbeddingModel("embed-multilingual-v3.0") + + assert model.model == "embed-multilingual-v3.0" + assert model.embedding_size == 1024 + assert model.input_type == "search_document" + assert model.client == mock_client + mock_cohere.Client.assert_called_once() + + def test_init_with_custom_input_type(self): + mock_cohere = MagicMock() + mock_client = Mock() + mock_cohere.Client.return_value = mock_client + + with patch.dict("sys.modules", {"cohere": mock_cohere}): + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel + + model = CohereEmbeddingModel( + "embed-english-v3.0", input_type="search_query" + ) + + assert model.model == "embed-english-v3.0" + assert model.embedding_size == 1024 + assert model.input_type == "search_query" + + def test_init_with_unknown_model(self): + mock_cohere = MagicMock() + mock_client = Mock() + mock_cohere.Client.return_value = mock_client + + mock_response = Mock() + mock_response.embeddings = [[0.1] * 512] + mock_client.embed.return_value = mock_response + + with patch.dict("sys.modules", {"cohere": mock_cohere}): + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel + + model = CohereEmbeddingModel("custom-unknown-model") + + assert model.model == "custom-unknown-model" + assert model.embedding_size == 512 + mock_client.embed.assert_called_once_with( + texts=["test"], + model="custom-unknown-model", + input_type="search_document", + ) + + def test_import_error_when_cohere_not_installed(self): + with patch.dict("sys.modules", {"cohere": None}): + with pytest.raises(ImportError, match="Could not import cohere"): + if "nemoguardrails.embeddings.providers.cohere" in sys.modules: + del sys.modules["nemoguardrails.embeddings.providers.cohere"] + + from nemoguardrails.embeddings.providers.cohere import ( + CohereEmbeddingModel, + ) + + CohereEmbeddingModel("embed-v4.0") + + def test_encode_success(self): + mock_cohere = MagicMock() + mock_client = Mock() + mock_cohere.Client.return_value = mock_client + + mock_response = Mock() + mock_response.embeddings = [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + ] + mock_client.embed.return_value = mock_response + + with patch.dict("sys.modules", {"cohere": mock_cohere}): + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel + + model = CohereEmbeddingModel("embed-english-light-v3.0") + documents = ["hello world", "test document"] + result = model.encode(documents) + + assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + mock_client.embed.assert_called_with( + texts=documents, + model="embed-english-light-v3.0", + input_type="search_document", + ) + + def test_encode_with_custom_input_type(self): + mock_cohere = MagicMock() + mock_client = Mock() + mock_cohere.Client.return_value = mock_client + + mock_response = Mock() + mock_response.embeddings = [[0.1, 0.2]] + mock_client.embed.return_value = mock_response + + with patch.dict("sys.modules", {"cohere": mock_cohere}): + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel + + model = CohereEmbeddingModel("embed-v4.0", input_type="classification") + documents = ["classify this"] + result = model.encode(documents) + + assert result == [[0.1, 0.2]] + mock_client.embed.assert_called_with( + texts=documents, model="embed-v4.0", input_type="classification" + ) + + @pytest.mark.asyncio + async def test_encode_async_success(self): + mock_cohere = MagicMock() + mock_client = Mock() + mock_cohere.Client.return_value = mock_client + + mock_response = Mock() + mock_response.embeddings = [[0.1, 0.2, 0.3]] + mock_client.embed.return_value = mock_response + + with patch.dict("sys.modules", {"cohere": mock_cohere}): + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel + + model = CohereEmbeddingModel("embed-multilingual-v3.0") + documents = ["async test"] + result = await model.encode_async(documents) + + assert result == [[0.1, 0.2, 0.3]] + mock_client.embed.assert_called_once() + + def test_init_with_api_key_kwarg(self): + mock_cohere = MagicMock() + mock_client = Mock() + mock_cohere.Client.return_value = mock_client + + with patch.dict("sys.modules", {"cohere": mock_cohere}): + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel + + model = CohereEmbeddingModel("embed-v4.0", api_key="test-key-123") + + mock_cohere.Client.assert_called_once_with(api_key="test-key-123") + + def test_all_predefined_models(self): + mock_cohere = MagicMock() + mock_client = Mock() + mock_cohere.Client.return_value = mock_client + + models_to_test = { + "embed-v4.0": 1536, + "embed-english-v3.0": 1024, + "embed-english-light-v3.0": 384, + "embed-multilingual-v3.0": 1024, + "embed-multilingual-light-v3.0": 384, + } + + with patch.dict("sys.modules", {"cohere": mock_cohere}): + from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel + + for model_name, expected_size in models_to_test.items(): + model = CohereEmbeddingModel(model_name) + assert model.embedding_size == expected_size + assert model.model == model_name + + +class TestOpenAIEmbeddingModelMocked: + def test_init_with_known_model(self): + mock_openai = MagicMock() + mock_openai.__version__ = "1.0.0" + mock_client = Mock() + mock_openai.OpenAI.return_value = mock_client + + with patch.dict("sys.modules", {"openai": mock_openai}): + from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel + + model = OpenAIEmbeddingModel("text-embedding-3-small") + + assert model.model == "text-embedding-3-small" + assert model.embedding_size == 1536 + assert model.client == mock_client + mock_openai.OpenAI.assert_called_once() + + def test_init_with_unknown_model(self): + mock_openai = MagicMock() + mock_openai.__version__ = "1.0.0" + mock_client = Mock() + mock_openai.OpenAI.return_value = mock_client + + mock_response = Mock() + mock_record = Mock() + mock_record.embedding = [0.1] * 2048 + mock_response.data = [mock_record] + mock_client.embeddings.create.return_value = mock_response + + with patch.dict("sys.modules", {"openai": mock_openai}): + from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel + + model = OpenAIEmbeddingModel("custom-unknown-model") + + assert model.model == "custom-unknown-model" + assert model.embedding_size == 2048 + mock_client.embeddings.create.assert_called_once_with( + input=["test"], model="custom-unknown-model" + ) + + def test_import_error_when_openai_not_installed(self): + with patch.dict("sys.modules", {"openai": None}): + with pytest.raises(ImportError, match="Could not import openai"): + if "nemoguardrails.embeddings.providers.openai" in sys.modules: + del sys.modules["nemoguardrails.embeddings.providers.openai"] + + from nemoguardrails.embeddings.providers.openai import ( + OpenAIEmbeddingModel, + ) + + OpenAIEmbeddingModel("text-embedding-3-small") + + def test_old_version_error(self): + mock_openai = MagicMock() + mock_openai.__version__ = "0.28.0" + + with patch.dict("sys.modules", {"openai": mock_openai}): + from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel + + with pytest.raises(RuntimeError, match="openai<1.0.0"): + OpenAIEmbeddingModel("text-embedding-3-small") + + def test_encode_success(self): + mock_openai = MagicMock() + mock_openai.__version__ = "1.0.0" + mock_client = Mock() + mock_openai.OpenAI.return_value = mock_client + + mock_response = Mock() + mock_record1 = Mock() + mock_record1.embedding = [0.1, 0.2, 0.3] + mock_record2 = Mock() + mock_record2.embedding = [0.4, 0.5, 0.6] + mock_response.data = [mock_record1, mock_record2] + mock_client.embeddings.create.return_value = mock_response + + with patch.dict("sys.modules", {"openai": mock_openai}): + from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel + + model = OpenAIEmbeddingModel("text-embedding-ada-002") + documents = ["hello world", "test document"] + result = model.encode(documents) + + assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + mock_client.embeddings.create.assert_called_with( + input=documents, model="text-embedding-ada-002" + ) + + @pytest.mark.asyncio + async def test_encode_async_success(self): + mock_openai = MagicMock() + mock_openai.__version__ = "1.0.0" + mock_client = Mock() + mock_openai.OpenAI.return_value = mock_client + + mock_response = Mock() + mock_record = Mock() + mock_record.embedding = [0.1, 0.2, 0.3] + mock_response.data = [mock_record] + mock_client.embeddings.create.return_value = mock_response + + with patch.dict("sys.modules", {"openai": mock_openai}): + from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel + + model = OpenAIEmbeddingModel("text-embedding-3-small") + documents = ["async test"] + result = await model.encode_async(documents) + + assert result == [[0.1, 0.2, 0.3]] + mock_client.embeddings.create.assert_called_once() + + def test_init_with_api_key_kwarg(self): + mock_openai = MagicMock() + mock_openai.__version__ = "1.0.0" + mock_client = Mock() + mock_openai.OpenAI.return_value = mock_client + + with patch.dict("sys.modules", {"openai": mock_openai}): + from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel + + model = OpenAIEmbeddingModel( + "text-embedding-3-small", api_key="test-key-123" + ) + + mock_openai.OpenAI.assert_called_once_with(api_key="test-key-123") + + def test_all_predefined_models(self): + mock_openai = MagicMock() + mock_openai.__version__ = "1.0.0" + mock_client = Mock() + mock_openai.OpenAI.return_value = mock_client + + models_to_test = { + "text-embedding-ada-002": 1536, + "text-embedding-3-small": 1536, + "text-embedding-3-large": 3072, + } + + with patch.dict("sys.modules", {"openai": mock_openai}): + from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel + + for model_name, expected_size in models_to_test.items(): + model = OpenAIEmbeddingModel(model_name) + assert model.embedding_size == expected_size + assert model.model == model_name From e168610be9d195d2e3beb49315019662b0b2fd52 Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Fri, 10 Oct 2025 18:06:47 +0200 Subject: [PATCH 2/3] skip cohere tests till #1305 is rebased onto develop after merging this PR --- tests/test_embeddings_providers_mock.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_embeddings_providers_mock.py b/tests/test_embeddings_providers_mock.py index e887a7250..014973dc4 100644 --- a/tests/test_embeddings_providers_mock.py +++ b/tests/test_embeddings_providers_mock.py @@ -18,7 +18,17 @@ import pytest +try: + import nemoguardrails.embeddings.providers.cohere + COHERE_AVAILABLE = True +except (ImportError, ModuleNotFoundError): + COHERE_AVAILABLE = False + + +@pytest.mark.skipif( + not COHERE_AVAILABLE, reason="Cohere provider not available in this branch" +) class TestCohereEmbeddingModelMocked: def test_init_with_known_model(self): mock_cohere = MagicMock() From 18afea79fc3e5596da4e0ac4449087809397a807 Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Wed, 15 Oct 2025 11:45:34 +0200 Subject: [PATCH 3/3] apply review suggestions --- tests/test_embeddings_providers_mock.py | 28 +++++++++++++++---------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/test_embeddings_providers_mock.py b/tests/test_embeddings_providers_mock.py index 014973dc4..993ac6a5c 100644 --- a/tests/test_embeddings_providers_mock.py +++ b/tests/test_embeddings_providers_mock.py @@ -102,10 +102,11 @@ def test_encode_success(self): mock_cohere.Client.return_value = mock_client mock_response = Mock() - mock_response.embeddings = [ + expected_embeddings = [ [0.1, 0.2, 0.3], [0.4, 0.5, 0.6], ] + mock_response.embeddings = expected_embeddings mock_client.embed.return_value = mock_response with patch.dict("sys.modules", {"cohere": mock_cohere}): @@ -115,7 +116,7 @@ def test_encode_success(self): documents = ["hello world", "test document"] result = model.encode(documents) - assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + assert result == expected_embeddings mock_client.embed.assert_called_with( texts=documents, model="embed-english-light-v3.0", @@ -128,7 +129,8 @@ def test_encode_with_custom_input_type(self): mock_cohere.Client.return_value = mock_client mock_response = Mock() - mock_response.embeddings = [[0.1, 0.2]] + expected_embeddings = [[0.1, 0.2]] + mock_response.embeddings = expected_embeddings mock_client.embed.return_value = mock_response with patch.dict("sys.modules", {"cohere": mock_cohere}): @@ -138,7 +140,7 @@ def test_encode_with_custom_input_type(self): documents = ["classify this"] result = model.encode(documents) - assert result == [[0.1, 0.2]] + assert result == expected_embeddings mock_client.embed.assert_called_with( texts=documents, model="embed-v4.0", input_type="classification" ) @@ -150,7 +152,8 @@ async def test_encode_async_success(self): mock_cohere.Client.return_value = mock_client mock_response = Mock() - mock_response.embeddings = [[0.1, 0.2, 0.3]] + expected_embeddings = [[0.1, 0.2, 0.3]] + mock_response.embeddings = expected_embeddings mock_client.embed.return_value = mock_response with patch.dict("sys.modules", {"cohere": mock_cohere}): @@ -160,7 +163,7 @@ async def test_encode_async_success(self): documents = ["async test"] result = await model.encode_async(documents) - assert result == [[0.1, 0.2, 0.3]] + assert result == expected_embeddings mock_client.embed.assert_called_once() def test_init_with_api_key_kwarg(self): @@ -267,9 +270,11 @@ def test_encode_success(self): mock_response = Mock() mock_record1 = Mock() - mock_record1.embedding = [0.1, 0.2, 0.3] + expected_embedding1 = [0.1, 0.2, 0.3] + mock_record1.embedding = expected_embedding1 mock_record2 = Mock() - mock_record2.embedding = [0.4, 0.5, 0.6] + expected_embedding2 = [0.4, 0.5, 0.6] + mock_record2.embedding = expected_embedding2 mock_response.data = [mock_record1, mock_record2] mock_client.embeddings.create.return_value = mock_response @@ -280,7 +285,7 @@ def test_encode_success(self): documents = ["hello world", "test document"] result = model.encode(documents) - assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + assert result == [expected_embedding1, expected_embedding2] mock_client.embeddings.create.assert_called_with( input=documents, model="text-embedding-ada-002" ) @@ -294,7 +299,8 @@ async def test_encode_async_success(self): mock_response = Mock() mock_record = Mock() - mock_record.embedding = [0.1, 0.2, 0.3] + expected_embedding = [0.1, 0.2, 0.3] + mock_record.embedding = expected_embedding mock_response.data = [mock_record] mock_client.embeddings.create.return_value = mock_response @@ -305,7 +311,7 @@ async def test_encode_async_success(self): documents = ["async test"] result = await model.encode_async(documents) - assert result == [[0.1, 0.2, 0.3]] + assert result == [expected_embedding] mock_client.embeddings.create.assert_called_once() def test_init_with_api_key_kwarg(self):