Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
344 changes: 344 additions & 0 deletions tests/test_embeddings_providers_mock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,344 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
from unittest.mock import MagicMock, Mock, patch

import pytest

try:
import nemoguardrails.embeddings.providers.cohere

COHERE_AVAILABLE = True
except (ImportError, ModuleNotFoundError):
COHERE_AVAILABLE = False


@pytest.mark.skipif(
not COHERE_AVAILABLE, reason="Cohere provider not available in this branch"
)
class TestCohereEmbeddingModelMocked:
def test_init_with_known_model(self):
mock_cohere = MagicMock()
mock_client = Mock()
mock_cohere.Client.return_value = mock_client

with patch.dict("sys.modules", {"cohere": mock_cohere}):
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel

model = CohereEmbeddingModel("embed-multilingual-v3.0")

assert model.model == "embed-multilingual-v3.0"
assert model.embedding_size == 1024
assert model.input_type == "search_document"
assert model.client == mock_client
mock_cohere.Client.assert_called_once()

def test_init_with_custom_input_type(self):
mock_cohere = MagicMock()
mock_client = Mock()
mock_cohere.Client.return_value = mock_client

with patch.dict("sys.modules", {"cohere": mock_cohere}):
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel

model = CohereEmbeddingModel(
"embed-english-v3.0", input_type="search_query"
)

assert model.model == "embed-english-v3.0"
assert model.embedding_size == 1024
assert model.input_type == "search_query"

def test_init_with_unknown_model(self):
mock_cohere = MagicMock()
mock_client = Mock()
mock_cohere.Client.return_value = mock_client

mock_response = Mock()
mock_response.embeddings = [[0.1] * 512]
mock_client.embed.return_value = mock_response

with patch.dict("sys.modules", {"cohere": mock_cohere}):
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel

model = CohereEmbeddingModel("custom-unknown-model")

assert model.model == "custom-unknown-model"
assert model.embedding_size == 512
mock_client.embed.assert_called_once_with(
texts=["test"],
model="custom-unknown-model",
input_type="search_document",
)

def test_import_error_when_cohere_not_installed(self):
with patch.dict("sys.modules", {"cohere": None}):
with pytest.raises(ImportError, match="Could not import cohere"):
if "nemoguardrails.embeddings.providers.cohere" in sys.modules:
del sys.modules["nemoguardrails.embeddings.providers.cohere"]

from nemoguardrails.embeddings.providers.cohere import (
CohereEmbeddingModel,
)

CohereEmbeddingModel("embed-v4.0")

def test_encode_success(self):
mock_cohere = MagicMock()
mock_client = Mock()
mock_cohere.Client.return_value = mock_client

mock_response = Mock()
mock_response.embeddings = [
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
]
mock_client.embed.return_value = mock_response

with patch.dict("sys.modules", {"cohere": mock_cohere}):
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel

model = CohereEmbeddingModel("embed-english-light-v3.0")
documents = ["hello world", "test document"]
result = model.encode(documents)

assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
mock_client.embed.assert_called_with(
texts=documents,
model="embed-english-light-v3.0",
input_type="search_document",
)

def test_encode_with_custom_input_type(self):
mock_cohere = MagicMock()
mock_client = Mock()
mock_cohere.Client.return_value = mock_client

mock_response = Mock()
mock_response.embeddings = [[0.1, 0.2]]
mock_client.embed.return_value = mock_response

with patch.dict("sys.modules", {"cohere": mock_cohere}):
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel

model = CohereEmbeddingModel("embed-v4.0", input_type="classification")
documents = ["classify this"]
result = model.encode(documents)

assert result == [[0.1, 0.2]]
mock_client.embed.assert_called_with(
texts=documents, model="embed-v4.0", input_type="classification"
)

@pytest.mark.asyncio
async def test_encode_async_success(self):
mock_cohere = MagicMock()
mock_client = Mock()
mock_cohere.Client.return_value = mock_client

mock_response = Mock()
mock_response.embeddings = [[0.1, 0.2, 0.3]]
mock_client.embed.return_value = mock_response

with patch.dict("sys.modules", {"cohere": mock_cohere}):
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel

model = CohereEmbeddingModel("embed-multilingual-v3.0")
documents = ["async test"]
result = await model.encode_async(documents)

assert result == [[0.1, 0.2, 0.3]]
mock_client.embed.assert_called_once()

def test_init_with_api_key_kwarg(self):
mock_cohere = MagicMock()
mock_client = Mock()
mock_cohere.Client.return_value = mock_client

with patch.dict("sys.modules", {"cohere": mock_cohere}):
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel

model = CohereEmbeddingModel("embed-v4.0", api_key="test-key-123")

mock_cohere.Client.assert_called_once_with(api_key="test-key-123")

def test_all_predefined_models(self):
mock_cohere = MagicMock()
mock_client = Mock()
mock_cohere.Client.return_value = mock_client

models_to_test = {
"embed-v4.0": 1536,
"embed-english-v3.0": 1024,
"embed-english-light-v3.0": 384,
"embed-multilingual-v3.0": 1024,
"embed-multilingual-light-v3.0": 384,
}

with patch.dict("sys.modules", {"cohere": mock_cohere}):
from nemoguardrails.embeddings.providers.cohere import CohereEmbeddingModel

for model_name, expected_size in models_to_test.items():
model = CohereEmbeddingModel(model_name)
assert model.embedding_size == expected_size
assert model.model == model_name


class TestOpenAIEmbeddingModelMocked:
def test_init_with_known_model(self):
mock_openai = MagicMock()
mock_openai.__version__ = "1.0.0"
mock_client = Mock()
mock_openai.OpenAI.return_value = mock_client

with patch.dict("sys.modules", {"openai": mock_openai}):
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel

model = OpenAIEmbeddingModel("text-embedding-3-small")

assert model.model == "text-embedding-3-small"
assert model.embedding_size == 1536
assert model.client == mock_client
mock_openai.OpenAI.assert_called_once()

def test_init_with_unknown_model(self):
mock_openai = MagicMock()
mock_openai.__version__ = "1.0.0"
mock_client = Mock()
mock_openai.OpenAI.return_value = mock_client

mock_response = Mock()
mock_record = Mock()
mock_record.embedding = [0.1] * 2048
mock_response.data = [mock_record]
mock_client.embeddings.create.return_value = mock_response

with patch.dict("sys.modules", {"openai": mock_openai}):
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel

model = OpenAIEmbeddingModel("custom-unknown-model")

assert model.model == "custom-unknown-model"
assert model.embedding_size == 2048
mock_client.embeddings.create.assert_called_once_with(
input=["test"], model="custom-unknown-model"
)

def test_import_error_when_openai_not_installed(self):
with patch.dict("sys.modules", {"openai": None}):
with pytest.raises(ImportError, match="Could not import openai"):
if "nemoguardrails.embeddings.providers.openai" in sys.modules:
del sys.modules["nemoguardrails.embeddings.providers.openai"]

from nemoguardrails.embeddings.providers.openai import (
OpenAIEmbeddingModel,
)

OpenAIEmbeddingModel("text-embedding-3-small")

def test_old_version_error(self):
mock_openai = MagicMock()
mock_openai.__version__ = "0.28.0"

with patch.dict("sys.modules", {"openai": mock_openai}):
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel

with pytest.raises(RuntimeError, match="openai<1.0.0"):
OpenAIEmbeddingModel("text-embedding-3-small")

def test_encode_success(self):
mock_openai = MagicMock()
mock_openai.__version__ = "1.0.0"
mock_client = Mock()
mock_openai.OpenAI.return_value = mock_client

mock_response = Mock()
mock_record1 = Mock()
mock_record1.embedding = [0.1, 0.2, 0.3]
mock_record2 = Mock()
mock_record2.embedding = [0.4, 0.5, 0.6]
mock_response.data = [mock_record1, mock_record2]
mock_client.embeddings.create.return_value = mock_response

with patch.dict("sys.modules", {"openai": mock_openai}):
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel

model = OpenAIEmbeddingModel("text-embedding-ada-002")
documents = ["hello world", "test document"]
result = model.encode(documents)

assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
mock_client.embeddings.create.assert_called_with(
input=documents, model="text-embedding-ada-002"
)

@pytest.mark.asyncio
async def test_encode_async_success(self):
mock_openai = MagicMock()
mock_openai.__version__ = "1.0.0"
mock_client = Mock()
mock_openai.OpenAI.return_value = mock_client

mock_response = Mock()
mock_record = Mock()
mock_record.embedding = [0.1, 0.2, 0.3]
mock_response.data = [mock_record]
mock_client.embeddings.create.return_value = mock_response

with patch.dict("sys.modules", {"openai": mock_openai}):
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel

model = OpenAIEmbeddingModel("text-embedding-3-small")
documents = ["async test"]
result = await model.encode_async(documents)

assert result == [[0.1, 0.2, 0.3]]
mock_client.embeddings.create.assert_called_once()

def test_init_with_api_key_kwarg(self):
mock_openai = MagicMock()
mock_openai.__version__ = "1.0.0"
mock_client = Mock()
mock_openai.OpenAI.return_value = mock_client

with patch.dict("sys.modules", {"openai": mock_openai}):
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel

model = OpenAIEmbeddingModel(
"text-embedding-3-small", api_key="test-key-123"
)

mock_openai.OpenAI.assert_called_once_with(api_key="test-key-123")

def test_all_predefined_models(self):
mock_openai = MagicMock()
mock_openai.__version__ = "1.0.0"
mock_client = Mock()
mock_openai.OpenAI.return_value = mock_client

models_to_test = {
"text-embedding-ada-002": 1536,
"text-embedding-3-small": 1536,
"text-embedding-3-large": 3072,
}

with patch.dict("sys.modules", {"openai": mock_openai}):
from nemoguardrails.embeddings.providers.openai import OpenAIEmbeddingModel

for model_name, expected_size in models_to_test.items():
model = OpenAIEmbeddingModel(model_name)
assert model.embedding_size == expected_size
assert model.model == model_name