diff --git a/.github/workflows/testing-integration.yaml b/.github/workflows/testing-integration.yaml index 8275a5f1..53023bde 100644 --- a/.github/workflows/testing-integration.yaml +++ b/.github/workflows/testing-integration.yaml @@ -3,6 +3,31 @@ name: "Integration Tests" workflow_call: {} jobs: + reorg: + name: Reorg tests + runs-on: ubuntu-latest + env: + PINECONE_DEBUG_CURL: 'true' + PINECONE_API_KEY: '${{ secrets.PINECONE_API_KEY }}' + PINECONE_ADDITIONAL_HEADERS: '{"sdk-test-suite": "pinecone-python-client"}' + strategy: + matrix: + python_version: [3.9, 3.12] + steps: + - uses: actions/checkout@v4 + - name: 'Set up Python ${{ matrix.python_version }}' + uses: actions/setup-python@v5 + with: + python-version: '${{ matrix.python_version }}' + - name: Setup Poetry + uses: ./.github/actions/setup-poetry + with: + include_asyncio: true + - name: 'Run index tests' + run: poetry run pytest tests/integration/control/index --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + - name: 'Run collection tests' + run: poetry run pytest tests/integration/control/collections --retries 5 --retry-delay 35 -s -vv --log-cli-level=DEBUG + inference: name: Inference tests diff --git a/pinecone/__init__.py b/pinecone/__init__.py index 13a65bd1..b2c0f5c8 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -2,22 +2,143 @@ .. include:: ../pdoc/README.md """ -from .deprecated_plugins import check_for_deprecated_plugins +from .deprecated_plugins import check_for_deprecated_plugins as _check_for_deprecated_plugins from .deprecation_warnings import * -from .config import * +from .pinecone import Pinecone +from .pinecone_asyncio import PineconeAsyncio from .exceptions import * -from .control import * -from .data import * -from .models import * -from .enums import * from .utils import __version__ import logging +# Set up lazy import handling +from .utils.lazy_imports import setup_lazy_imports as _setup_lazy_imports + +_inference_lazy_imports = { + "RerankModel": ("pinecone.inference", "RerankModel"), + "EmbedModel": ("pinecone.inference", "EmbedModel"), +} + +_db_data_lazy_imports = { + "Vector": ("pinecone.db_data.dataclasses", "Vector"), + "SparseValues": ("pinecone.db_data.dataclasses", "SparseValues"), + "SearchQuery": ("pinecone.db_data.dataclasses", "SearchQuery"), + "SearchQueryVector": ("pinecone.db_data.dataclasses", "SearchQueryVector"), + "SearchRerank": ("pinecone.db_data.dataclasses", "SearchRerank"), + "FetchResponse": ("pinecone.db_data.dataclasses", "FetchResponse"), + "DeleteRequest": ("pinecone.db_data.models", "DeleteRequest"), + "DescribeIndexStatsRequest": ("pinecone.db_data.models", "DescribeIndexStatsRequest"), + "DescribeIndexStatsResponse": ("pinecone.db_data.models", "IndexDescription"), + "RpcStatus": ("pinecone.db_data.models", "RpcStatus"), + "ScoredVector": ("pinecone.db_data.models", "ScoredVector"), + "SingleQueryResults": ("pinecone.db_data.models", "SingleQueryResults"), + "QueryRequest": ("pinecone.db_data.models", "QueryRequest"), + "QueryResponse": ("pinecone.db_data.models", "QueryResponse"), + "UpsertResponse": ("pinecone.db_data.models", "UpsertResponse"), + "UpdateRequest": ("pinecone.db_data.models", "UpdateRequest"), + "ImportErrorMode": ("pinecone.core.openapi.db_data.models", "ImportErrorMode"), + "VectorDictionaryMissingKeysError": ( + "pinecone.db_data.errors", + "VectorDictionaryMissingKeysError", + ), + "VectorDictionaryExcessKeysError": ( + "pinecone.db_data.errors", + "VectorDictionaryExcessKeysError", + ), + "VectorTupleLengthError": ("pinecone.db_data.errors", "VectorTupleLengthError"), + "SparseValuesTypeError": ("pinecone.db_data.errors", "SparseValuesTypeError"), + "SparseValuesMissingKeysError": ("pinecone.db_data.errors", "SparseValuesMissingKeysError"), + "SparseValuesDictionaryExpectedError": ( + "pinecone.db_data.errors", + "SparseValuesDictionaryExpectedError", + ), + "Index": ("pinecone.db_data.import_error", "Index"), + "Inference": ("pinecone.db_data.import_error", "Inference"), +} + +_db_control_lazy_imports = { + "CloudProvider": ("pinecone.db_control.enums", "CloudProvider"), + "AwsRegion": ("pinecone.db_control.enums", "AwsRegion"), + "GcpRegion": ("pinecone.db_control.enums", "GcpRegion"), + "AzureRegion": ("pinecone.db_control.enums", "AzureRegion"), + "PodIndexEnvironment": ("pinecone.db_control.enums", "PodIndexEnvironment"), + "Metric": ("pinecone.db_control.enums", "Metric"), + "VectorType": ("pinecone.db_control.enums", "VectorType"), + "DeletionProtection": ("pinecone.db_control.enums", "DeletionProtection"), + "CollectionDescription": ("pinecone.db_control.models", "CollectionDescription"), + "CollectionList": ("pinecone.db_control.models", "CollectionList"), + "IndexList": ("pinecone.db_control.models", "IndexList"), + "IndexModel": ("pinecone.db_control.models", "IndexModel"), + "IndexEmbed": ("pinecone.db_control.models", "IndexEmbed"), + "ServerlessSpec": ("pinecone.db_control.models", "ServerlessSpec"), + "ServerlessSpecDefinition": ("pinecone.db_control.models", "ServerlessSpecDefinition"), + "PodSpec": ("pinecone.db_control.models", "PodSpec"), + "PodSpecDefinition": ("pinecone.db_control.models", "PodSpecDefinition"), + "PodType": ("pinecone.db_control.enums", "PodType"), +} + +_config_lazy_imports = { + "Config": ("pinecone.config", "Config"), + "ConfigBuilder": ("pinecone.config", "ConfigBuilder"), + "PineconeConfig": ("pinecone.config", "PineconeConfig"), +} + +# Define imports to be lazily loaded +_LAZY_IMPORTS = { + **_inference_lazy_imports, + **_db_data_lazy_imports, + **_db_control_lazy_imports, + **_config_lazy_imports, +} + +# Set up the lazy import handler +_setup_lazy_imports(_LAZY_IMPORTS) + # Raise an exception if the user is attempting to use the SDK with # deprecated plugins installed in their project. -check_for_deprecated_plugins() +_check_for_deprecated_plugins() # Silence annoying log messages from the plugin interface logging.getLogger("pinecone_plugin_interface").setLevel(logging.CRITICAL) + +__all__ = [ + "__version__", + # Deprecated top-levelfunctions + "init", + "create_index", + "delete_index", + "list_indexes", + "describe_index", + "configure_index", + "scale_index", + "create_collection", + "delete_collection", + "describe_collection", + "list_collections", + # Primary client classes + "Pinecone", + "PineconeAsyncio", + # All lazy-loaded types + *list(_LAZY_IMPORTS.keys()), + # Exception classes + "PineconeException", + "PineconeApiException", + "PineconeConfigurationError", + "PineconeProtocolError", + "PineconeApiAttributeError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeApiKeyError", + "NotFoundException", + "UnauthorizedException", + "ForbiddenException", + "ServiceException", + "ListConversionException", + "VectorDictionaryMissingKeysError", + "VectorDictionaryExcessKeysError", + "VectorTupleLengthError", + "SparseValuesTypeError", + "SparseValuesMissingKeysError", + "SparseValuesDictionaryExpectedError", +] diff --git a/pinecone/__init__.pyi b/pinecone/__init__.pyi new file mode 100644 index 00000000..249fe9d8 --- /dev/null +++ b/pinecone/__init__.pyi @@ -0,0 +1,118 @@ +from pinecone.config import Config +from pinecone.config import ConfigBuilder +from pinecone.config import PineconeConfig +from pinecone.inference import RerankModel +from pinecone.inference import EmbedModel +from pinecone.db_data.dataclasses import ( + Vector, + SparseValues, + SearchQuery, + SearchQueryVector, + SearchRerank, +) +from pinecone.db_data.models import ( + FetchResponse, + DeleteRequest, + DescribeIndexStatsRequest, + IndexDescription as DescribeIndexStatsResponse, + RpcStatus, + ScoredVector, + SingleQueryResults, + QueryRequest, + QueryResponse, + UpsertResponse, + UpdateRequest, +) +from pinecone.core.openapi.db_data.models import ImportErrorMode +from pinecone.db_data.errors import ( + VectorDictionaryMissingKeysError, + VectorDictionaryExcessKeysError, + VectorTupleLengthError, + SparseValuesTypeError, + SparseValuesMissingKeysError, + SparseValuesDictionaryExpectedError, +) +from pinecone.db_control.enums import ( + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + PodIndexEnvironment, + Metric, + VectorType, + DeletionProtection, + PodType, +) +from pinecone.db_control.models import ( + CollectionDescription, + CollectionList, + IndexList, + IndexModel, + IndexEmbed, + ServerlessSpec, + ServerlessSpecDefinition, + PodSpec, + PodSpecDefinition, +) +from pinecone.pinecone import Pinecone +from pinecone.pinecone_asyncio import PineconeAsyncio + +# Re-export all the types +__all__ = [ + # Primary client classes + "Pinecone", + "PineconeAsyncio", + # Config classes + "Config", + "ConfigBuilder", + "PineconeConfig", + # Inference classes + "RerankModel", + "EmbedModel", + # Data classes + "Vector", + "SparseValues", + "SearchQuery", + "SearchQueryVector", + "SearchRerank", + # Model classes + "FetchResponse", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "RpcStatus", + "ScoredVector", + "SingleQueryResults", + "QueryRequest", + "QueryResponse", + "UpsertResponse", + "UpdateRequest", + "ImportErrorMode", + # Error classes + "VectorDictionaryMissingKeysError", + "VectorDictionaryExcessKeysError", + "VectorTupleLengthError", + "SparseValuesTypeError", + "SparseValuesMissingKeysError", + "SparseValuesDictionaryExpectedError", + # Control plane enums + "CloudProvider", + "AwsRegion", + "GcpRegion", + "AzureRegion", + "PodIndexEnvironment", + "Metric", + "VectorType", + "DeletionProtection", + "PodType", + # Control plane models + "CollectionDescription", + "CollectionList", + "IndexList", + "IndexModel", + "IndexEmbed", + "ServerlessSpec", + "ServerlessSpecDefinition", + "PodSpec", + "PodSpecDefinition", +] diff --git a/pinecone/config/__init__.py b/pinecone/config/__init__.py index 7abb7278..f292622f 100644 --- a/pinecone/config/__init__.py +++ b/pinecone/config/__init__.py @@ -2,6 +2,7 @@ import os from .config import ConfigBuilder, Config +from .openapi_configuration import Configuration as OpenApiConfiguration from .pinecone_config import PineconeConfig if os.getenv("PINECONE_DEBUG") is not None: diff --git a/pinecone/config/config.py b/pinecone/config/config.py index 01a703e0..9029c45a 100644 --- a/pinecone/config/config.py +++ b/pinecone/config/config.py @@ -1,9 +1,11 @@ -from typing import NamedTuple, Optional, Dict +from typing import NamedTuple, Optional, Dict, TYPE_CHECKING import os -from pinecone.exceptions.exceptions import PineconeConfigurationError -from pinecone.config.openapi import OpenApiConfigFactory -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.exceptions import PineconeConfigurationError +from pinecone.config.openapi_config_factory import OpenApiConfigFactory + +if TYPE_CHECKING: + from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration # Duplicated this util to help resolve circular imports @@ -81,8 +83,8 @@ def build( @staticmethod def build_openapi_config( - config: Config, openapi_config: Optional[OpenApiConfiguration] = None, **kwargs - ) -> OpenApiConfiguration: + config: Config, openapi_config: Optional["OpenApiConfiguration"] = None, **kwargs + ) -> "OpenApiConfiguration": if openapi_config: openapi_config = OpenApiConfigFactory.copy( openapi_config=openapi_config, api_key=config.api_key, host=config.host diff --git a/pinecone/config/openapi.py b/pinecone/config/openapi_config_factory.py similarity index 93% rename from pinecone/config/openapi.py rename to pinecone/config/openapi_config_factory.py index d6bdf702..56a1de64 100644 --- a/pinecone/config/openapi.py +++ b/pinecone/config/openapi_config_factory.py @@ -1,13 +1,11 @@ import sys -from typing import List, Optional +from typing import List, Optional, Tuple import certifi import socket import copy -from urllib3.connection import HTTPConnection - -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration TCP_KEEPINTVL = 60 # Sec TCP_KEEPIDLE = 300 # Sec @@ -58,7 +56,7 @@ def _get_socket_options( keep_alive_idle_sec: int = TCP_KEEPIDLE, keep_alive_interval_sec: int = TCP_KEEPINTVL, keep_alive_tries: int = TCP_KEEPCNT, - ) -> List[tuple]: + ) -> List[Tuple[int, int, int]]: """ Returns the socket options to pass to OpenAPI's Rest client Args: @@ -72,7 +70,8 @@ def _get_socket_options( """ # Source: https://www.finbourne.com/blog/the-mysterious-hanging-client-tcp-keep-alives - socket_params = HTTPConnection.default_socket_options + # urllib3.connection.HTTPConnection.default_socket_options + socket_params = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] if not do_keep_alive: return socket_params diff --git a/pinecone/config/openapi_configuration.py b/pinecone/config/openapi_configuration.py new file mode 100644 index 00000000..fce6defc --- /dev/null +++ b/pinecone/config/openapi_configuration.py @@ -0,0 +1,441 @@ +import copy +import logging +import multiprocessing + +from http import client as http_client +from pinecone.exceptions import PineconeApiValueError +from typing import TypedDict + + +class HostSetting(TypedDict): + url: str + description: str + + +JSON_SCHEMA_VALIDATION_KEYWORDS = { + "multipleOf", + "maximum", + "exclusiveMaximum", + "minimum", + "exclusiveMinimum", + "maxLength", + "minLength", + "pattern", + "maxItems", + "minItems", +} + + +class Configuration: + """Class to hold the configuration of the API client. + + :param host: Base url + :param api_key: Dict to store API key(s). + Each entry in the dict specifies an API key. + The dict key is the name of the security scheme in the OAS specification. + The dict value is the API key secret. + :param api_key_prefix: Dict to store API prefix (e.g. Bearer) + The dict key is the name of the security scheme in the OAS specification. + The dict value is an API key prefix when generating the auth data. + :param discard_unknown_keys: Boolean value indicating whether to discard + unknown properties. A server may send a response that includes additional + properties that are not known by the client in the following scenarios: + 1. The OpenAPI document is incomplete, i.e. it does not match the server + implementation. + 2. The client was generated using an older version of the OpenAPI document + and the server has been upgraded since then. + If a schema in the OpenAPI document defines the additionalProperties attribute, + then all undeclared properties received by the server are injected into the + additional properties map. In that case, there are undeclared properties, and + nothing to discard. + :param disabled_client_side_validations (string): Comma-separated list of + JSON schema validation keywords to disable JSON schema structural validation + rules. The following keywords may be specified: multipleOf, maximum, + exclusiveMaximum, minimum, exclusiveMinimum, maxLength, minLength, pattern, + maxItems, minItems. + By default, the validation is performed for data generated locally by the client + and data received from the server, independent of any validation performed by + the server side. If the input data does not satisfy the JSON schema validation + rules specified in the OpenAPI document, an exception is raised. + If disabled_client_side_validations is set, structural validation is + disabled. This can be useful to troubleshoot data validation problem, such as + when the OpenAPI document validation rules do not match the actual API data + received by the server. + :param server_operation_index: Mapping from operation ID to an index to server + configuration. + :param server_operation_variables: Mapping from operation ID to a mapping with + string values to replace variables in templated server configuration. + The validation of enums is performed for variables with defined enum values before. + :param ssl_ca_cert: str - the path to a file of concatenated CA certificates + in PEM format + + :Example: + + API Key Authentication Example. + Given the following security scheme in the OpenAPI specification: + components: + securitySchemes: + cookieAuth: # name for the security scheme + type: apiKey + in: cookie + name: JSESSIONID # cookie name + + You can programmatically set the cookie: + + conf = pinecone.config.openapi_configuration.Configuration( + api_key={'cookieAuth': 'abc123'} + api_key_prefix={'cookieAuth': 'JSESSIONID'} + ) + + The following cookie will be added to the HTTP request: + Cookie: JSESSIONID abc123 + """ + + _default = None + + def __init__( + self, + host=None, + api_key=None, + api_key_prefix=None, + discard_unknown_keys=False, + disabled_client_side_validations="", + server_index=None, + server_variables=None, + server_operation_index=None, + server_operation_variables=None, + ssl_ca_cert=None, + ): + """Constructor""" + self._base_path = "https://api.pinecone.io" if host is None else host + """Default Base url + """ + self.server_index = 0 if server_index is None and host is None else server_index + self.server_operation_index = server_operation_index or {} + """Default server index + """ + self.server_variables = server_variables or {} + self.server_operation_variables = server_operation_variables or {} + """Default server variables + """ + self.temp_folder_path = None + """Temp file folder for downloading files + """ + # Authentication Settings + self.api_key = {} + if api_key: + self.api_key = api_key + """dict to store API key(s) + """ + self.api_key_prefix = {} + if api_key_prefix: + self.api_key_prefix = api_key_prefix + """dict to store API prefix (e.g. Bearer) + """ + self.refresh_api_key_hook = None + """function hook to refresh API key if expired + """ + self.discard_unknown_keys = discard_unknown_keys + self.disabled_client_side_validations = disabled_client_side_validations + self.logger = {} + """Logging Settings + """ + self.logger["package_logger"] = logging.getLogger("pinecone.openapi_support") + self.logger["urllib3_logger"] = logging.getLogger("urllib3") + self.logger_format = "%(asctime)s %(levelname)s %(message)s" + """Log format + """ + self.logger_stream_handler = None + """Log stream handler + """ + self.logger_file_handler = None + """Log file handler + """ + self.logger_file = None + """Debug file location + """ + self.debug = False + """Debug switch + """ + + self.verify_ssl = True + """SSL/TLS verification + Set this to false to skip verifying SSL certificate when calling API + from https server. + """ + self.ssl_ca_cert = ssl_ca_cert + """Set this to customize the certificate file to verify the peer. + """ + self.cert_file = None + """client certificate file + """ + self.key_file = None + """client key file + """ + self.assert_hostname = None + """Set this to True/False to enable/disable SSL hostname verification. + """ + + self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 + """urllib3 connection pool's maximum number of connections saved + per pool. urllib3 uses 1 connection as default value, but this is + not the best value when you are making a lot of possibly parallel + requests to the same host, which is often the case here. + cpu_count * 5 is used as default value to increase performance. + """ + + self.proxy = None + """Proxy URL + """ + self.proxy_headers = None + """Proxy headers + """ + self.safe_chars_for_path_param = "" + """Safe chars for path_param + """ + self.retries = None + """Adding retries to override urllib3 default value 3 + """ + # Enable client side validation + self.client_side_validation = True + + # Options to pass down to the underlying urllib3 socket + self.socket_options = None + + def __deepcopy__(self, memo): + cls = self.__class__ + result = cls.__new__(cls) + memo[id(self)] = result + for k, v in self.__dict__.items(): + if k not in ("logger", "logger_file_handler"): + setattr(result, k, copy.deepcopy(v, memo)) + # shallow copy of loggers + result.logger = copy.copy(self.logger) + # use setters to configure loggers + result.logger_file = self.logger_file + result.debug = self.debug + return result + + def __setattr__(self, name, value): + object.__setattr__(self, name, value) + if name == "disabled_client_side_validations": + s = set(filter(None, value.split(","))) + for v in s: + if v not in JSON_SCHEMA_VALIDATION_KEYWORDS: + raise PineconeApiValueError("Invalid keyword: '{0}''".format(v)) + self._disabled_client_side_validations = s + + @classmethod + def set_default(cls, default): + """Set default instance of configuration. + + It stores default configuration, which can be + returned by get_default_copy method. + + :param default: object of Configuration + """ + cls._default = copy.deepcopy(default) + + @classmethod + def get_default_copy(cls): + """Return new instance of configuration. + + This method returns newly created, based on default constructor, + object of Configuration class or returns a copy of default + configuration passed by the set_default method. + + :return: The configuration object. + """ + if cls._default is not None: + return copy.deepcopy(cls._default) + return Configuration() + + @property + def logger_file(self): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + return self.__logger_file + + @logger_file.setter + def logger_file(self, value): + """The logger file. + + If the logger_file is None, then add stream handler and remove file + handler. Otherwise, add file handler and remove stream handler. + + :param value: The logger_file path. + :type: str + """ + self.__logger_file = value + if self.__logger_file: + # If set logging file, + # then add file handler and remove stream handler. + self.logger_file_handler = logging.FileHandler(self.__logger_file) + self.logger_file_handler.setFormatter(self.logger_formatter) + for _, logger in self.logger.items(): + logger.addHandler(self.logger_file_handler) + + @property + def debug(self): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + return self.__debug + + @debug.setter + def debug(self, value): + """Debug status + + :param value: The debug status, True or False. + :type: bool + """ + self.__debug = value + if self.__debug: + # if debug status is True, turn on debug logging + for _, logger in self.logger.items(): + logger.setLevel(logging.DEBUG) + # turn on http_client debug + http_client.HTTPConnection.debuglevel = 1 + else: + # if debug status is False, turn off debug logging, + # setting log level to default `logging.WARNING` + for _, logger in self.logger.items(): + logger.setLevel(logging.WARNING) + # turn off http_client debug + http_client.HTTPConnection.debuglevel = 0 + + @property + def logger_format(self): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + return self.__logger_format + + @logger_format.setter + def logger_format(self, value): + """The logger format. + + The logger_formatter will be updated when sets logger_format. + + :param value: The format string. + :type: str + """ + self.__logger_format = value + self.logger_formatter = logging.Formatter(self.__logger_format) + + def get_api_key_with_prefix(self, identifier, alias=None): + """Gets API key (with prefix if set). + + :param identifier: The identifier of apiKey. + :param alias: The alternative identifier of apiKey. + :return: The token for api key authentication. + """ + if self.refresh_api_key_hook is not None: + self.refresh_api_key_hook(self) + key = self.api_key.get(identifier, self.api_key.get(alias) if alias is not None else None) + if key: + prefix = self.api_key_prefix.get(identifier) + if prefix: + return "%s %s" % (prefix, key) + else: + return key + + def auth_settings(self): + """Gets Auth Settings dict for api client. + + :return: The Auth Settings information dict. + """ + auth = {} + if "ApiKeyAuth" in self.api_key: + auth["ApiKeyAuth"] = { + "type": "api_key", + "in": "header", + "key": "Api-Key", + "value": self.get_api_key_with_prefix("ApiKeyAuth"), + } + return auth + + def get_host_settings(self): + """Gets an array of host settings + + :return: An array of host settings + """ + return [{"url": "https://api.pinecone.io", "description": "Production API endpoints"}] + + def get_host_from_settings(self, index, variables=None, servers=None): + """Gets host URL based on the index and variables + :param index: array index of the host settings + :param variables: hash of variable and the corresponding value + :param servers: an array of host settings or None + :return: URL based on host settings + """ + if index is None: + return self._base_path + + variables = {} if variables is None else variables + servers = self.get_host_settings() if servers is None else servers + + try: + server = servers[index] + except IndexError: + raise ValueError( + "Invalid index {0} when selecting the host settings. Must be less than {1}".format( + index, len(servers) + ) + ) + + url = server["url"] + + # go through variables and replace placeholders + for variable_name, variable in server.get("variables", {}).items(): + used_value = variables.get(variable_name, variable["default_value"]) + + if "enum_values" in variable and used_value not in variable["enum_values"]: + raise ValueError( + "The variable `{0}` in the host URL has invalid value {1}. Must be {2}.".format( + variable_name, variables[variable_name], variable["enum_values"] + ) + ) + + url = url.replace("{" + variable_name + "}", used_value) + + return url + + @property + def host(self): + """Return generated host.""" + return self.get_host_from_settings(self.server_index, variables=self.server_variables) + + @host.setter + def host(self, value): + """Fix base path.""" + self._base_path = value + self.server_index = None + + def __repr__(self): + attrs = [ + f"host={self.host}", + "api_key=***", + f"api_key_prefix={self.api_key_prefix}", + f"connection_pool_maxsize={self.connection_pool_maxsize}", + f"discard_unknown_keys={self.discard_unknown_keys}", + f"disabled_client_side_validations={self.disabled_client_side_validations}", + f"server_index={self.server_index}", + f"server_variables={self.server_variables}", + f"server_operation_index={self.server_operation_index}", + f"server_operation_variables={self.server_operation_variables}", + f"ssl_ca_cert={self.ssl_ca_cert}", + ] + return f"Configuration({', '.join(attrs)})" diff --git a/pinecone/control/__init__.py b/pinecone/control/__init__.py index a26e352a..b45bc64e 100644 --- a/pinecone/control/__init__.py +++ b/pinecone/control/__init__.py @@ -1,6 +1,9 @@ -from .pinecone import Pinecone -from .pinecone_asyncio import PineconeAsyncio +import warnings -from .repr_overrides import install_repr_overrides +from pinecone.db_control import * -install_repr_overrides() +warnings.warn( + "The module at `pinecone.control` has moved to `pinecone.db_control`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) diff --git a/pinecone/control/pinecone.py b/pinecone/control/pinecone.py deleted file mode 100644 index f3c8f404..00000000 --- a/pinecone/control/pinecone.py +++ /dev/null @@ -1,354 +0,0 @@ -import time -import logging -from typing import Optional, Dict, Union -from multiprocessing import cpu_count - -from .index_host_store import IndexHostStore -from .pinecone_interface import PineconeDBControlInterface - -from pinecone.config import PineconeConfig, ConfigBuilder - -from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi -from pinecone.openapi_support.api_client import ApiClient - - -from pinecone.utils import normalize_host, setup_openapi_client, PluginAware -from pinecone.core.openapi.db_control import API_VERSION -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexModel, - IndexList, - CollectionList, - IndexEmbed, -) -from .langchain_import_warnings import _build_langchain_attribute_error_message -from pinecone.utils import docslinks -from pinecone.data import _Index, _Inference, _IndexAsyncio - -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict -from .request_factory import PineconeDBControlRequestFactory - -logger = logging.getLogger(__name__) -""" @private """ - - -class Pinecone(PineconeDBControlInterface, PluginAware): - """ - A client for interacting with Pinecone's vector database. - - This class implements methods for managing and interacting with Pinecone resources - such as collections and indexes. - """ - - def __init__( - self, - api_key: Optional[str] = None, - host: Optional[str] = None, - proxy_url: Optional[str] = None, - proxy_headers: Optional[Dict[str, str]] = None, - ssl_ca_certs: Optional[str] = None, - ssl_verify: Optional[bool] = None, - additional_headers: Optional[Dict[str, str]] = {}, - pool_threads: Optional[int] = None, - **kwargs, - ): - for deprecated_kwarg in {"config", "openapi_config", "index_api"}: - if deprecated_kwarg in kwargs: - raise NotImplementedError( - f"Passing {deprecated_kwarg} is no longer supported. Please pass individual settings such as proxy_url, proxy_headers, ssl_ca_certs, and ssl_verify directly to the Pinecone constructor as keyword arguments. See the README at {docslinks['README']} for examples." - ) - - self.config = PineconeConfig.build( - api_key=api_key, - host=host, - additional_headers=additional_headers, - proxy_url=proxy_url, - proxy_headers=proxy_headers, - ssl_ca_certs=ssl_ca_certs, - ssl_verify=ssl_verify, - **kwargs, - ) - """ @private """ - - self.openapi_config = ConfigBuilder.build_openapi_config(self.config, **kwargs) - """ @private """ - - if pool_threads is None: - self.pool_threads = 5 * cpu_count() - """ @private """ - else: - self.pool_threads = pool_threads - """ @private """ - - self._inference = None # Lazy initialization - """ @private """ - - self.index_api = setup_openapi_client( - api_client_klass=ApiClient, - api_klass=ManageIndexesApi, - config=self.config, - openapi_config=self.openapi_config, - pool_threads=pool_threads, - api_version=API_VERSION, - ) - """ @private """ - - self.index_host_store = IndexHostStore() - """ @private """ - - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads - ) - - @property - def inference(self): - """ - Inference is a namespace where an instance of the `pinecone.data.features.inference.inference.Inference` class is lazily created and cached. - """ - if self._inference is None: - self._inference = _Inference(config=self.config, openapi_config=self.openapi_config) - return self._inference - - def create_index( - self, - name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], - dimension: Optional[int] = None, - metric: Optional[Union[Metric, str]] = Metric.COSINE, - timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, - tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_request( - name=name, - spec=spec, - dimension=dimension, - metric=metric, - deletion_protection=deletion_protection, - vector_type=vector_type, - tags=tags, - ) - resp = self.index_api.create_index(create_index_request=req) - - if timeout == -1: - return IndexModel(resp) - return self.__poll_describe_index_until_ready(name, timeout) - - def create_index_for_model( - self, - name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], - tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - timeout: Optional[int] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_for_model_request( - name=name, - cloud=cloud, - region=region, - embed=embed, - tags=tags, - deletion_protection=deletion_protection, - ) - resp = self.index_api.create_index_for_model(req) - - if timeout == -1: - return IndexModel(resp) - return self.__poll_describe_index_until_ready(name, timeout) - - def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): - description = None - - def is_ready() -> bool: - nonlocal description - description = self.describe_index(name=name) - return description.status.ready - - total_wait_time = 0 - if timeout is None: - # Wait indefinitely - while not is_ready(): - logger.debug( - f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." - ) - total_wait_time += 5 - time.sleep(5) - - else: - # Wait for a maximum of timeout seconds - while not is_ready(): - if timeout < 0: - logger.error(f"Index {name} is not ready. Timeout reached.") - link = docslinks["API_DESCRIBE_INDEX"] - timeout_msg = ( - f"Please call describe_index() to confirm index status. See docs at {link}" - ) - raise TimeoutError(timeout_msg) - - logger.debug( - f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" - ) - total_wait_time += 5 - time.sleep(5) - timeout -= 5 - - return description - - def delete_index(self, name: str, timeout: Optional[int] = None): - self.index_api.delete_index(name) - self.index_host_store.delete_host(self.config, name) - - if timeout == -1: - return - - if timeout is None: - while self.has_index(name): - time.sleep(5) - else: - while self.has_index(name) and timeout >= 0: - time.sleep(5) - timeout -= 5 - if timeout and timeout < 0: - raise ( - TimeoutError( - "Please call the list_indexes API ({}) to confirm if index is deleted".format( - "https://www.pinecone.io/docs/api/operation/list_indexes/" - ) - ) - ) - - def list_indexes(self) -> IndexList: - response = self.index_api.list_indexes() - return IndexList(response) - - def describe_index(self, name: str) -> IndexModel: - api_instance = self.index_api - description = api_instance.describe_index(name) - host = description.host - self.index_host_store.set_host(self.config, name, host) - - return IndexModel(description) - - def has_index(self, name: str) -> bool: - if name in self.list_indexes().names(): - return True - else: - return False - - def configure_index( - self, - name: str, - replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, - tags: Optional[Dict[str, str]] = None, - ): - api_instance = self.index_api - description = self.describe_index(name=name) - - req = PineconeDBControlRequestFactory.configure_index_request( - description=description, - replicas=replicas, - pod_type=pod_type, - deletion_protection=deletion_protection, - tags=tags, - ) - api_instance.configure_index(name, configure_index_request=req) - - def create_collection(self, name: str, source: str) -> None: - req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) - self.index_api.create_collection(create_collection_request=req) - - def list_collections(self) -> CollectionList: - response = self.index_api.list_collections() - return CollectionList(response) - - def delete_collection(self, name: str) -> None: - self.index_api.delete_collection(name) - - def describe_collection(self, name: str): - return self.index_api.describe_collection(name).to_dict() - - @staticmethod - def from_texts(*args, **kwargs): - """@private""" - raise AttributeError(_build_langchain_attribute_error_message("from_texts")) - - @staticmethod - def from_documents(*args, **kwargs): - """@private""" - raise AttributeError(_build_langchain_attribute_error_message("from_documents")) - - def Index(self, name: str = "", host: str = "", **kwargs): - if name == "" and host == "": - raise ValueError("Either name or host must be specified") - - pt = kwargs.pop("pool_threads", None) or self.pool_threads - api_key = self.config.api_key - openapi_config = self.openapi_config - - if host != "": - check_realistic_host(host) - - # Use host url if it is provided - index_host = normalize_host(host) - else: - # Otherwise, get host url from describe_index using the index name - index_host = self.index_host_store.get_host(self.index_api, self.config, name) - - return _Index( - host=index_host, - api_key=api_key, - pool_threads=pt, - openapi_config=openapi_config, - source_tag=self.config.source_tag, - **kwargs, - ) - - def IndexAsyncio(self, host: str, **kwargs): - api_key = self.config.api_key - openapi_config = self.openapi_config - - if host is None or host == "": - raise ValueError("A host must be specified") - - check_realistic_host(host) - index_host = normalize_host(host) - - return _IndexAsyncio( - host=index_host, - api_key=api_key, - openapi_config=openapi_config, - source_tag=self.config.source_tag, - **kwargs, - ) - - -def check_realistic_host(host: str) -> None: - """@private - - Checks whether a user-provided host string seems plausible. - Someone could erroneously pass an index name as the host by - mistake, and if they have done that we'd like to give them a - simple error message as feedback rather than attempting to - call the url and getting a more cryptic DNS resolution error. - """ - - if "." not in host and "localhost" not in host: - raise ValueError( - f"You passed '{host}' as the host but this does not appear to be valid. Call describe_index() to confirm the host of the index." - ) diff --git a/pinecone/control/pinecone_asyncio.py b/pinecone/control/pinecone_asyncio.py deleted file mode 100644 index 1373c8e4..00000000 --- a/pinecone/control/pinecone_asyncio.py +++ /dev/null @@ -1,340 +0,0 @@ -import logging -import asyncio -from typing import Optional, Dict, Union - -from pinecone.config import PineconeConfig, ConfigBuilder - -from pinecone.core.openapi.db_control.api.manage_indexes_api import AsyncioManageIndexesApi -from pinecone.openapi_support import AsyncioApiClient - -from pinecone.utils import normalize_host, setup_async_openapi_client -from pinecone.core.openapi.db_control import API_VERSION -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexModel, - IndexList, - CollectionList, - IndexEmbed, -) -from pinecone.utils import docslinks - -from pinecone.data import _IndexAsyncio, _AsyncioInference -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict -from .request_factory import PineconeDBControlRequestFactory -from .pinecone_interface_asyncio import PineconeAsyncioDBControlInterface -from .pinecone import check_realistic_host - -logger = logging.getLogger(__name__) -""" @private """ - - -class PineconeAsyncio(PineconeAsyncioDBControlInterface): - """ - `PineconeAsyncio` is an asyncio client for interacting with Pinecone's control plane API. - - This class implements methods for managing and interacting with Pinecone resources - such as collections and indexes. - - To perform data operations such as inserting and querying vectors, use the `IndexAsyncio` class. - - ```python - import asyncio - from pinecone import Pinecone - - async def main(): - pc = Pinecone() - async with pc.IndexAsyncio(host="my-index.pinecone.io") as idx: - await idx.upsert(vectors=[(1, [1, 2, 3]), (2, [4, 5, 6])]) - - asyncio.run(main()) - ``` - """ - - def __init__( - self, - api_key: Optional[str] = None, - host: Optional[str] = None, - proxy_url: Optional[str] = None, - # proxy_headers: Optional[Dict[str, str]] = None, - ssl_ca_certs: Optional[str] = None, - ssl_verify: Optional[bool] = None, - additional_headers: Optional[Dict[str, str]] = {}, - **kwargs, - ): - for deprecated_kwarg in {"config", "openapi_config"}: - if deprecated_kwarg in kwargs: - raise NotImplementedError( - f"Passing {deprecated_kwarg} is no longer supported. Please pass individual settings such as proxy_url, ssl_ca_certs, and ssl_verify directly to the Pinecone constructor as keyword arguments. See the README at {docslinks['README']} for examples." - ) - - for unimplemented_kwarg in {"proxy_headers"}: - if unimplemented_kwarg in kwargs: - raise NotImplementedError( - f"You have passed {unimplemented_kwarg} but this configuration has not been implemented for PineconeAsyncio." - ) - - self.config = PineconeConfig.build( - api_key=api_key, - host=host, - additional_headers=additional_headers, - proxy_url=proxy_url, - proxy_headers=None, - ssl_ca_certs=ssl_ca_certs, - ssl_verify=ssl_verify, - **kwargs, - ) - """ @private """ - - self.openapi_config = ConfigBuilder.build_openapi_config(self.config, **kwargs) - """ @private """ - - self._inference = None # Lazy initialization - """ @private """ - - self.index_api = setup_async_openapi_client( - api_client_klass=AsyncioApiClient, - api_klass=AsyncioManageIndexesApi, - config=self.config, - openapi_config=self.openapi_config, - api_version=API_VERSION, - ) - """ @private """ - - async def __aenter__(self): - return self - - async def __aexit__(self, exc_type, exc_value, traceback): - await self.close() - - async def close(self): - """Cleanup resources used by the Pinecone client. - - This method should be called when the client is no longer needed so that - it can cleanup the aioahttp session and other resources. - - After close has been called, the client instance should not be used. - - ```python - import asyncio - from pinecone import PineconeAsyncio - - async def main(): - pc = PineconeAsyncio() - desc = await pc.describe_index(name="my-index") - await pc.close() - - asyncio.run(main()) - ``` - - If you are using the client as a context manager, the close method is called automatically - when exiting. - - ```python - import asyncio - from pinecone import PineconeAsyncio - - async def main(): - async with PineconeAsyncio() as pc: - desc = await pc.describe_index(name="my-index") - - # No need to call close in this case because the "async with" syntax - # automatically calls close when exiting the block. - asyncio.run(main()) - ``` - - """ - await self.index_api.api_client.close() - - @property - def inference(self): - """Dynamically create and cache the Inference instance.""" - if self._inference is None: - self._inference = _AsyncioInference(api_client=self.index_api.api_client) - return self._inference - - async def create_index( - self, - name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], - dimension: Optional[int] = None, - metric: Optional[Union[Metric, str]] = Metric.COSINE, - timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, - tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_request( - name=name, - spec=spec, - dimension=dimension, - metric=metric, - deletion_protection=deletion_protection, - vector_type=vector_type, - tags=tags, - ) - resp = await self.index_api.create_index(create_index_request=req) - - if timeout == -1: - return IndexModel(resp) - return await self.__poll_describe_index_until_ready(name, timeout) - - async def create_index_for_model( - self, - name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], - tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - timeout: Optional[int] = None, - ) -> IndexModel: - req = PineconeDBControlRequestFactory.create_index_for_model_request( - name=name, - cloud=cloud, - region=region, - embed=embed, - tags=tags, - deletion_protection=deletion_protection, - ) - resp = await self.index_api.create_index_for_model(req) - - if timeout == -1: - return IndexModel(resp) - return await self.__poll_describe_index_until_ready(name, timeout) - - async def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): - description = None - - async def is_ready() -> bool: - nonlocal description - description = await self.describe_index(name=name) - return description.status.ready - - total_wait_time = 0 - if timeout is None: - # Wait indefinitely - while not await is_ready(): - logger.debug( - f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." - ) - total_wait_time += 5 - await asyncio.sleep(5) - - else: - # Wait for a maximum of timeout seconds - while not await is_ready(): - if timeout < 0: - logger.error(f"Index {name} is not ready. Timeout reached.") - link = docslinks["API_DESCRIBE_INDEX"] - timeout_msg = ( - f"Please call describe_index() to confirm index status. See docs at {link}" - ) - raise TimeoutError(timeout_msg) - - logger.debug( - f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" - ) - total_wait_time += 5 - await asyncio.sleep(5) - timeout -= 5 - - return description - - async def delete_index(self, name: str, timeout: Optional[int] = None): - await self.index_api.delete_index(name) - - if timeout == -1: - return - - if timeout is None: - while await self.has_index(name): - await asyncio.sleep(5) - else: - while await self.has_index(name) and timeout >= 0: - await asyncio.sleep(5) - timeout -= 5 - if timeout and timeout < 0: - raise ( - TimeoutError( - "Please call the list_indexes API ({}) to confirm if index is deleted".format( - "https://www.pinecone.io/docs/api/operation/list_indexes/" - ) - ) - ) - - async def list_indexes(self) -> IndexList: - response = await self.index_api.list_indexes() - return IndexList(response) - - async def describe_index(self, name: str) -> IndexModel: - description = await self.index_api.describe_index(name) - return IndexModel(description) - - async def has_index(self, name: str) -> bool: - available_indexes = await self.list_indexes() - if name in available_indexes.names(): - return True - else: - return False - - async def configure_index( - self, - name: str, - replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, - tags: Optional[Dict[str, str]] = None, - ): - description = await self.describe_index(name=name) - - req = PineconeDBControlRequestFactory.configure_index_request( - description=description, - replicas=replicas, - pod_type=pod_type, - deletion_protection=deletion_protection, - tags=tags, - ) - await self.index_api.configure_index(name, configure_index_request=req) - - async def create_collection(self, name: str, source: str): - req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) - await self.index_api.create_collection(create_collection_request=req) - - async def list_collections(self) -> CollectionList: - response = await self.index_api.list_collections() - return CollectionList(response) - - async def delete_collection(self, name: str): - await self.index_api.delete_collection(name) - - async def describe_collection(self, name: str): - return await self.index_api.describe_collection(name).to_dict() - - def IndexAsyncio(self, host: str, **kwargs) -> _IndexAsyncio: - api_key = self.config.api_key - openapi_config = self.openapi_config - - if host is None or host == "": - raise ValueError("A host must be specified") - - check_realistic_host(host) - index_host = normalize_host(host) - - return _IndexAsyncio( - host=index_host, - api_key=api_key, - openapi_config=openapi_config, - source_tag=self.config.source_tag, - **kwargs, - ) diff --git a/pinecone/core/openapi/db_control/__init__.py b/pinecone/core/openapi/db_control/__init__.py index 1a6949bb..31408552 100644 --- a/pinecone/core/openapi/db_control/__init__.py +++ b/pinecone/core/openapi/db_control/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/core/openapi/db_data/__init__.py b/pinecone/core/openapi/db_data/__init__.py index e8cbbfe1..76701561 100644 --- a/pinecone/core/openapi/db_data/__init__.py +++ b/pinecone/core/openapi/db_data/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/core/openapi/inference/__init__.py b/pinecone/core/openapi/inference/__init__.py index d878080c..9bf0fcdb 100644 --- a/pinecone/core/openapi/inference/__init__.py +++ b/pinecone/core/openapi/inference/__init__.py @@ -17,7 +17,7 @@ from pinecone.openapi_support.api_client import ApiClient # import Configuration -from pinecone.openapi_support.configuration import Configuration +from pinecone.config.openapi_configuration import Configuration # import exceptions from pinecone.openapi_support.exceptions import PineconeException diff --git a/pinecone/data/__init__.py b/pinecone/data/__init__.py index 8e040056..3ea4cd41 100644 --- a/pinecone/data/__init__.py +++ b/pinecone/data/__init__.py @@ -1,34 +1,10 @@ -from .index import ( - Index as _Index, - FetchResponse, - QueryResponse, - DescribeIndexStatsResponse, - UpsertResponse, - SparseValues, - Vector, -) -from .dataclasses import * -from .import_error import ( - Index, - IndexClientInstantiationError, - Inference, - InferenceInstantiationError, -) -from .index_asyncio import * -from .errors import ( - VectorDictionaryMissingKeysError, - VectorDictionaryExcessKeysError, - VectorTupleLengthError, - SparseValuesTypeError, - SparseValuesMissingKeysError, - SparseValuesDictionaryExpectedError, - MetadataDictionaryExpectedError, -) +import warnings + +from pinecone.db_data import * -from .features.bulk_import import ImportErrorMode -from .features.inference import ( - Inference as _Inference, - AsyncioInference as _AsyncioInference, - RerankModel, - EmbedModel, +warnings.warn( + "The module at `pinecone.data` has moved to `pinecone.db_data`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, ) diff --git a/pinecone/data/features/__init__.py b/pinecone/data/features/__init__.py index e69de29b..e4ff12ee 100644 --- a/pinecone/data/features/__init__.py +++ b/pinecone/data/features/__init__.py @@ -0,0 +1,10 @@ +import warnings + +from pinecone.db_data.features import * + +warnings.warn( + "The module at `pinecone.data.features` has moved to `pinecone.db_data.features`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) diff --git a/pinecone/data/features/bulk_imports/__init__.py b/pinecone/data/features/bulk_imports/__init__.py new file mode 100644 index 00000000..3af0d1f5 --- /dev/null +++ b/pinecone/data/features/bulk_imports/__init__.py @@ -0,0 +1,10 @@ +import warnings + +from pinecone.db_data.features.bulk_import import * + +warnings.warn( + "The module at `pinecone.data.features.bulk_import` has moved to `pinecone.db_data.features.bulk_import`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) diff --git a/pinecone/data/features/inference/__init__.py b/pinecone/data/features/inference/__init__.py index 30e93330..0280f382 100644 --- a/pinecone/data/features/inference/__init__.py +++ b/pinecone/data/features/inference/__init__.py @@ -1,6 +1,10 @@ -from .repl_overrides import install_repl_overrides -from .inference import Inference -from .inference_asyncio import AsyncioInference -from .inference_request_builder import RerankModel, EmbedModel +import warnings -install_repl_overrides() +from pinecone.inference import * + +warnings.warn( + "The module at `pinecone.data.features.inference` has moved to `pinecone.inference`. " + "Please update your imports. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) diff --git a/pinecone/db_control/__init__.py b/pinecone/db_control/__init__.py new file mode 100644 index 00000000..73d82468 --- /dev/null +++ b/pinecone/db_control/__init__.py @@ -0,0 +1,7 @@ +from .enums import * +from .models import * +from .db_control import DBControl +from .db_control_asyncio import DBControlAsyncio +from .repr_overrides import install_repr_overrides + +install_repr_overrides() diff --git a/pinecone/db_control/db_control.py b/pinecone/db_control/db_control.py new file mode 100644 index 00000000..69aef889 --- /dev/null +++ b/pinecone/db_control/db_control.py @@ -0,0 +1,60 @@ +import logging +from typing import Optional, TYPE_CHECKING + +from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi +from pinecone.openapi_support.api_client import ApiClient + +from pinecone.utils import setup_openapi_client +from pinecone.core.openapi.db_control import API_VERSION + + +logger = logging.getLogger(__name__) +""" @private """ + +if TYPE_CHECKING: + from .resources.sync.index import IndexResource + from .resources.sync.collection import CollectionResource + + +class DBControl: + def __init__(self, config, openapi_config, pool_threads): + self._config = config + """ @private """ + + self._openapi_config = openapi_config + """ @private """ + + self._pool_threads = pool_threads + """ @private """ + + self._index_api = setup_openapi_client( + api_client_klass=ApiClient, + api_klass=ManageIndexesApi, + config=self._config, + openapi_config=self._openapi_config, + pool_threads=self._pool_threads, + api_version=API_VERSION, + ) + """ @private """ + + self._index_resource: Optional["IndexResource"] = None + """ @private """ + + self._collection_resource: Optional["CollectionResource"] = None + """ @private """ + + @property + def index(self) -> "IndexResource": + if self._index_resource is None: + from .resources.sync.index import IndexResource + + self._index_resource = IndexResource(index_api=self._index_api, config=self._config) + return self._index_resource + + @property + def collection(self) -> "CollectionResource": + if self._collection_resource is None: + from .resources.sync.collection import CollectionResource + + self._collection_resource = CollectionResource(self._index_api) + return self._collection_resource diff --git a/pinecone/db_control/db_control_asyncio.py b/pinecone/db_control/db_control_asyncio.py new file mode 100644 index 00000000..91e3f179 --- /dev/null +++ b/pinecone/db_control/db_control_asyncio.py @@ -0,0 +1,58 @@ +import logging +from typing import Optional, TYPE_CHECKING + +from pinecone.core.openapi.db_control.api.manage_indexes_api import AsyncioManageIndexesApi +from pinecone.openapi_support import AsyncioApiClient + +from pinecone.utils import setup_async_openapi_client +from pinecone.core.openapi.db_control import API_VERSION + +logger = logging.getLogger(__name__) +""" @private """ + + +if TYPE_CHECKING: + from .resources.asyncio.index import IndexResourceAsyncio + from .resources.asyncio.collection import CollectionResourceAsyncio + + +class DBControlAsyncio: + def __init__(self, config, openapi_config): + self._config = config + """ @private """ + + self._openapi_config = openapi_config + """ @private """ + + self._index_api = setup_async_openapi_client( + api_client_klass=AsyncioApiClient, + api_klass=AsyncioManageIndexesApi, + config=self._config, + openapi_config=self._openapi_config, + api_version=API_VERSION, + ) + """ @private """ + + self._index_resource: Optional["IndexResourceAsyncio"] = None + """ @private """ + + self._collection_resource: Optional["CollectionResourceAsyncio"] = None + """ @private """ + + @property + def index(self) -> "IndexResourceAsyncio": + if self._index_resource is None: + from .resources.asyncio.index import IndexResourceAsyncio + + self._index_resource = IndexResourceAsyncio( + index_api=self._index_api, config=self._config + ) + return self._index_resource + + @property + def collection(self) -> "CollectionResourceAsyncio": + if self._collection_resource is None: + from .resources.asyncio.collection import CollectionResourceAsyncio + + self._collection_resource = CollectionResourceAsyncio(self._index_api) + return self._collection_resource diff --git a/pinecone/enums/__init__.py b/pinecone/db_control/enums/__init__.py similarity index 100% rename from pinecone/enums/__init__.py rename to pinecone/db_control/enums/__init__.py diff --git a/pinecone/enums/clouds.py b/pinecone/db_control/enums/clouds.py similarity index 92% rename from pinecone/enums/clouds.py rename to pinecone/db_control/enums/clouds.py index 192b3da5..8903f40a 100644 --- a/pinecone/enums/clouds.py +++ b/pinecone/db_control/enums/clouds.py @@ -3,10 +3,10 @@ class CloudProvider(Enum): """Cloud providers available for use with Pinecone serverless indexes - + This list could expand or change over time as more cloud providers are supported. - Check the Pinecone documentation for the most up-to-date list of supported cloud - providers. If you want to use a cloud provider that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported cloud + providers. If you want to use a cloud provider that is not listed here, you can pass a string value directly without using this enum. """ @@ -17,10 +17,10 @@ class CloudProvider(Enum): class AwsRegion(Enum): """AWS (Amazon Web Services) regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ @@ -31,10 +31,10 @@ class AwsRegion(Enum): class GcpRegion(Enum): """GCP (Google Cloud Platform) regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ @@ -44,10 +44,10 @@ class GcpRegion(Enum): class AzureRegion(Enum): """Azure regions available for use with Pinecone serverless indexes - + This list could expand or change over time as more regions are supported. - Check the Pinecone documentation for the most up-to-date list of supported - regions. If you want to use a region that is not listed here, you can + Check the Pinecone documentation for the most up-to-date list of supported + regions. If you want to use a region that is not listed here, you can pass a string value directly without using this enum. """ diff --git a/pinecone/enums/deletion_protection.py b/pinecone/db_control/enums/deletion_protection.py similarity index 100% rename from pinecone/enums/deletion_protection.py rename to pinecone/db_control/enums/deletion_protection.py diff --git a/pinecone/enums/metric.py b/pinecone/db_control/enums/metric.py similarity index 100% rename from pinecone/enums/metric.py rename to pinecone/db_control/enums/metric.py diff --git a/pinecone/enums/pod_index_environment.py b/pinecone/db_control/enums/pod_index_environment.py similarity index 100% rename from pinecone/enums/pod_index_environment.py rename to pinecone/db_control/enums/pod_index_environment.py diff --git a/pinecone/enums/pod_type.py b/pinecone/db_control/enums/pod_type.py similarity index 100% rename from pinecone/enums/pod_type.py rename to pinecone/db_control/enums/pod_type.py diff --git a/pinecone/enums/vector_type.py b/pinecone/db_control/enums/vector_type.py similarity index 100% rename from pinecone/enums/vector_type.py rename to pinecone/db_control/enums/vector_type.py diff --git a/pinecone/control/index_host_store.py b/pinecone/db_control/index_host_store.py similarity index 100% rename from pinecone/control/index_host_store.py rename to pinecone/db_control/index_host_store.py diff --git a/pinecone/db_control/models/__init__.py b/pinecone/db_control/models/__init__.py new file mode 100644 index 00000000..34003bfe --- /dev/null +++ b/pinecone/db_control/models/__init__.py @@ -0,0 +1,20 @@ +from .index_description import ServerlessSpecDefinition, PodSpecDefinition +from .collection_description import CollectionDescription +from .serverless_spec import ServerlessSpec +from .pod_spec import PodSpec +from .index_list import IndexList +from .collection_list import CollectionList +from .index_model import IndexModel +from ...inference.models.index_embed import IndexEmbed + +__all__ = [ + "CollectionDescription", + "PodSpec", + "PodSpecDefinition", + "ServerlessSpec", + "ServerlessSpecDefinition", + "IndexList", + "CollectionList", + "IndexModel", + "IndexEmbed", +] diff --git a/pinecone/models/collection_description.py b/pinecone/db_control/models/collection_description.py similarity index 100% rename from pinecone/models/collection_description.py rename to pinecone/db_control/models/collection_description.py diff --git a/pinecone/models/collection_list.py b/pinecone/db_control/models/collection_list.py similarity index 87% rename from pinecone/models/collection_list.py rename to pinecone/db_control/models/collection_list.py index 508ec685..f36a9708 100644 --- a/pinecone/models/collection_list.py +++ b/pinecone/db_control/models/collection_list.py @@ -1,5 +1,7 @@ import json -from pinecone.core.openapi.db_control.models import CollectionList as OpenAPICollectionList +from pinecone.core.openapi.db_control.model.collection_list import ( + CollectionList as OpenAPICollectionList, +) class CollectionList: diff --git a/pinecone/models/index_description.py b/pinecone/db_control/models/index_description.py similarity index 100% rename from pinecone/models/index_description.py rename to pinecone/db_control/models/index_description.py diff --git a/pinecone/models/index_list.py b/pinecone/db_control/models/index_list.py similarity index 89% rename from pinecone/models/index_list.py rename to pinecone/db_control/models/index_list.py index 71242e24..e918b4f5 100644 --- a/pinecone/models/index_list.py +++ b/pinecone/db_control/models/index_list.py @@ -1,5 +1,5 @@ import json -from pinecone.core.openapi.db_control.models import IndexList as OpenAPIIndexList +from pinecone.core.openapi.db_control.model.index_list import IndexList as OpenAPIIndexList from .index_model import IndexModel from typing import List diff --git a/pinecone/models/index_model.py b/pinecone/db_control/models/index_model.py similarity index 81% rename from pinecone/models/index_model.py rename to pinecone/db_control/models/index_model.py index 7deb2d7d..75ba1f30 100644 --- a/pinecone/models/index_model.py +++ b/pinecone/db_control/models/index_model.py @@ -1,4 +1,4 @@ -from pinecone.core.openapi.db_control.models import IndexModel as OpenAPIIndexModel +from pinecone.core.openapi.db_control.model.index_model import IndexModel as OpenAPIIndexModel class IndexModel: diff --git a/pinecone/models/list_response.py b/pinecone/db_control/models/list_response.py similarity index 100% rename from pinecone/models/list_response.py rename to pinecone/db_control/models/list_response.py diff --git a/pinecone/models/pod_spec.py b/pinecone/db_control/models/pod_spec.py similarity index 100% rename from pinecone/models/pod_spec.py rename to pinecone/db_control/models/pod_spec.py diff --git a/pinecone/models/serverless_spec.py b/pinecone/db_control/models/serverless_spec.py similarity index 100% rename from pinecone/models/serverless_spec.py rename to pinecone/db_control/models/serverless_spec.py diff --git a/pinecone/control/repr_overrides.py b/pinecone/db_control/repr_overrides.py similarity index 79% rename from pinecone/control/repr_overrides.py rename to pinecone/db_control/repr_overrides.py index 98e4b4d4..714b8dfb 100644 --- a/pinecone/control/repr_overrides.py +++ b/pinecone/db_control/repr_overrides.py @@ -1,6 +1,6 @@ from pinecone.utils import install_json_repr_override -from pinecone.models.index_model import IndexModel -from pinecone.core.openapi.db_control.models import CollectionModel +from pinecone.db_control.models.index_model import IndexModel +from pinecone.core.openapi.db_control.model.collection_model import CollectionModel def install_repr_overrides(): diff --git a/pinecone/control/request_factory.py b/pinecone/db_control/request_factory.py similarity index 87% rename from pinecone/control/request_factory.py rename to pinecone/db_control/request_factory.py index d4d0ce63..719f71a1 100644 --- a/pinecone/control/request_factory.py +++ b/pinecone/db_control/request_factory.py @@ -2,27 +2,37 @@ from typing import Optional, Dict, Any, Union from enum import Enum +from pinecone.utils import parse_non_empty_args, convert_enum_to_string -from pinecone.utils import convert_enum_to_string -from pinecone.core.openapi.db_control.models import ( - CreateCollectionRequest, +from pinecone.core.openapi.db_control.model.create_collection_request import CreateCollectionRequest +from pinecone.core.openapi.db_control.model.create_index_for_model_request import ( CreateIndexForModelRequest, +) +from pinecone.core.openapi.db_control.model.create_index_for_model_request_embed import ( CreateIndexForModelRequestEmbed, - CreateIndexRequest, - ConfigureIndexRequest, +) +from pinecone.core.openapi.db_control.model.create_index_request import CreateIndexRequest +from pinecone.core.openapi.db_control.model.configure_index_request import ConfigureIndexRequest +from pinecone.core.openapi.db_control.model.configure_index_request_spec import ( ConfigureIndexRequestSpec, +) +from pinecone.core.openapi.db_control.model.configure_index_request_spec_pod import ( ConfigureIndexRequestSpecPod, +) +from pinecone.core.openapi.db_control.model.deletion_protection import ( DeletionProtection as DeletionProtectionModel, - IndexSpec, - IndexTags, +) +from pinecone.core.openapi.db_control.model.index_spec import IndexSpec +from pinecone.core.openapi.db_control.model.index_tags import IndexTags +from pinecone.core.openapi.db_control.model.serverless_spec import ( ServerlessSpec as ServerlessSpecModel, - PodSpec as PodSpecModel, - PodSpecMetadataConfig, ) -from pinecone.models import ServerlessSpec, PodSpec, IndexModel, IndexEmbed -from pinecone.utils import parse_non_empty_args +from pinecone.core.openapi.db_control.model.pod_spec import PodSpec as PodSpecModel +from pinecone.core.openapi.db_control.model.pod_spec_metadata_config import PodSpecMetadataConfig + +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexEmbed -from pinecone.enums import ( +from pinecone.db_control.enums import ( Metric, VectorType, DeletionProtection, diff --git a/pinecone/db_control/resources/__init__.py b/pinecone/db_control/resources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pinecone/db_control/resources/asyncio/__init__.py b/pinecone/db_control/resources/asyncio/__init__.py new file mode 100644 index 00000000..9a4841d3 --- /dev/null +++ b/pinecone/db_control/resources/asyncio/__init__.py @@ -0,0 +1,2 @@ +from .index import IndexResourceAsyncio +from .collection import CollectionResourceAsyncio diff --git a/pinecone/db_control/resources/asyncio/collection.py b/pinecone/db_control/resources/asyncio/collection.py new file mode 100644 index 00000000..33c1f3d0 --- /dev/null +++ b/pinecone/db_control/resources/asyncio/collection.py @@ -0,0 +1,32 @@ +import logging +from typing import TYPE_CHECKING + + +from pinecone.db_control.models import CollectionList + +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory + +logger = logging.getLogger(__name__) +""" @private """ + +if TYPE_CHECKING: + pass + + +class CollectionResourceAsyncio: + def __init__(self, index_api): + self.index_api = index_api + + async def create(self, name: str, source: str): + req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) + await self.index_api.create_collection(create_collection_request=req) + + async def list(self) -> CollectionList: + response = await self.index_api.list_collections() + return CollectionList(response) + + async def delete(self, name: str): + await self.index_api.delete_collection(name) + + async def describe(self, name: str): + return await self.index_api.describe_collection(name).to_dict() diff --git a/pinecone/db_control/resources/asyncio/index.py b/pinecone/db_control/resources/asyncio/index.py new file mode 100644 index 00000000..2d93ae01 --- /dev/null +++ b/pinecone/db_control/resources/asyncio/index.py @@ -0,0 +1,175 @@ +import logging +import asyncio +from typing import Optional, Dict, Union + + +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed +from pinecone.utils import docslinks + +from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, +) +from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory +from pinecone.core.openapi.db_control import API_VERSION + + +logger = logging.getLogger(__name__) +""" @private """ + + +class IndexResourceAsyncio: + def __init__(self, index_api, config): + self.index_api = index_api + self.config = config + + async def create( + self, + name: str, + spec: Union[Dict, ServerlessSpec, PodSpec], + dimension: Optional[int] = None, + metric: Optional[Union[Metric, str]] = Metric.COSINE, + timeout: Optional[int] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + tags: Optional[Dict[str, str]] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_request( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + ) + resp = await self.index_api.create_index(create_index_request=req) + + if timeout == -1: + return IndexModel(resp) + return await self.__poll_describe_index_until_ready(name, timeout) + + async def create_for_model( + self, + name: str, + cloud: Union[CloudProvider, str], + region: Union[AwsRegion, GcpRegion, AzureRegion, str], + embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + timeout: Optional[int] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_for_model_request( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + ) + resp = await self.index_api.create_index_for_model(req) + + if timeout == -1: + return IndexModel(resp) + return await self.__poll_describe_index_until_ready(name, timeout) + + async def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): + description = None + + async def is_ready() -> bool: + nonlocal description + description = await self.describe(name=name) + return description.status.ready + + total_wait_time = 0 + if timeout is None: + # Wait indefinitely + while not await is_ready(): + logger.debug( + f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." + ) + total_wait_time += 5 + await asyncio.sleep(5) + + else: + # Wait for a maximum of timeout seconds + while not await is_ready(): + if timeout < 0: + logger.error(f"Index {name} is not ready. Timeout reached.") + link = docslinks["API_DESCRIBE_INDEX"](API_VERSION) + timeout_msg = ( + f"Please call describe_index() to confirm index status. See docs at {link}" + ) + raise TimeoutError(timeout_msg) + + logger.debug( + f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" + ) + total_wait_time += 5 + await asyncio.sleep(5) + timeout -= 5 + + return description + + async def delete(self, name: str, timeout: Optional[int] = None): + await self.index_api.delete_index(name) + + if timeout == -1: + return + + if timeout is None: + while await self.has(name): + await asyncio.sleep(5) + else: + while await self.has(name) and timeout >= 0: + await asyncio.sleep(5) + timeout -= 5 + if timeout and timeout < 0: + raise ( + TimeoutError( + "Please call the list_indexes API ({}) to confirm if index is deleted".format( + "https://www.pinecone.io/docs/api/operation/list_indexes/" + ) + ) + ) + + async def list(self) -> IndexList: + response = await self.index_api.list_indexes() + return IndexList(response) + + async def describe(self, name: str) -> IndexModel: + description = await self.index_api.describe_index(name) + return IndexModel(description) + + async def has(self, name: str) -> bool: + available_indexes = await self.list() + if name in available_indexes.names(): + return True + else: + return False + + async def configure( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union[PodType, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + description = await self.describe(name=name) + + req = PineconeDBControlRequestFactory.configure_index_request( + description=description, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + await self.index_api.configure_index(name, configure_index_request=req) diff --git a/pinecone/db_control/resources/sync/__init__.py b/pinecone/db_control/resources/sync/__init__.py new file mode 100644 index 00000000..cc904d53 --- /dev/null +++ b/pinecone/db_control/resources/sync/__init__.py @@ -0,0 +1,2 @@ +from .index import IndexResource +from .collection import CollectionResource diff --git a/pinecone/db_control/resources/sync/collection.py b/pinecone/db_control/resources/sync/collection.py new file mode 100644 index 00000000..1d8d11d8 --- /dev/null +++ b/pinecone/db_control/resources/sync/collection.py @@ -0,0 +1,27 @@ +import logging + +from pinecone.db_control.models import CollectionList +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory + +logger = logging.getLogger(__name__) +""" @private """ + + +class CollectionResource: + def __init__(self, index_api): + self.index_api = index_api + """ @private """ + + def create(self, name: str, source: str) -> None: + req = PineconeDBControlRequestFactory.create_collection_request(name=name, source=source) + self.index_api.create_collection(create_collection_request=req) + + def list(self) -> CollectionList: + response = self.index_api.list_collections() + return CollectionList(response) + + def delete(self, name: str) -> None: + self.index_api.delete_collection(name) + + def describe(self, name: str): + return self.index_api.describe_collection(name).to_dict() diff --git a/pinecone/db_control/resources/sync/index.py b/pinecone/db_control/resources/sync/index.py new file mode 100644 index 00000000..6ecf4cd2 --- /dev/null +++ b/pinecone/db_control/resources/sync/index.py @@ -0,0 +1,192 @@ +import time +import logging +from typing import Optional, Dict, Union + +from pinecone.db_control.index_host_store import IndexHostStore + +from pinecone.db_control.models import ServerlessSpec, PodSpec, IndexModel, IndexList, IndexEmbed +from pinecone.utils import docslinks + +from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, +) +from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict +from pinecone.db_control.request_factory import PineconeDBControlRequestFactory +from pinecone.core.openapi.db_control import API_VERSION + +logger = logging.getLogger(__name__) +""" @private """ + + +class IndexResource: + def __init__(self, index_api, config): + self._index_api = index_api + """ @private """ + + self._config = config + """ @private """ + + self._index_host_store = IndexHostStore() + """ @private """ + + def create( + self, + name: str, + spec: Union[Dict, ServerlessSpec, PodSpec], + dimension: Optional[int] = None, + metric: Optional[Union[Metric, str]] = Metric.COSINE, + timeout: Optional[int] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + tags: Optional[Dict[str, str]] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_request( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + ) + resp = self._index_api.create_index(create_index_request=req) + + if timeout == -1: + return IndexModel(resp) + return self.__poll_describe_index_until_ready(name, timeout) + + def create_for_model( + self, + name: str, + cloud: Union[CloudProvider, str], + region: Union[AwsRegion, GcpRegion, AzureRegion, str], + embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + timeout: Optional[int] = None, + ) -> IndexModel: + req = PineconeDBControlRequestFactory.create_index_for_model_request( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + ) + resp = self._index_api.create_index_for_model(req) + + if timeout == -1: + return IndexModel(resp) + return self.__poll_describe_index_until_ready(name, timeout) + + def __poll_describe_index_until_ready(self, name: str, timeout: Optional[int] = None): + description = None + + def is_ready() -> bool: + nonlocal description + description = self.describe(name=name) + return description.status.ready + + total_wait_time = 0 + if timeout is None: + # Wait indefinitely + while not is_ready(): + logger.debug( + f"Waiting for index {name} to be ready. Total wait time {total_wait_time} seconds." + ) + total_wait_time += 5 + time.sleep(5) + + else: + # Wait for a maximum of timeout seconds + while not is_ready(): + if timeout < 0: + logger.error(f"Index {name} is not ready. Timeout reached.") + link = docslinks["API_DESCRIBE_INDEX"](API_VERSION) + timeout_msg = ( + f"Please call describe_index() to confirm index status. See docs at {link}" + ) + raise TimeoutError(timeout_msg) + + logger.debug( + f"Waiting for index {name} to be ready. Total wait time: {total_wait_time}" + ) + total_wait_time += 5 + time.sleep(5) + timeout -= 5 + + return description + + def delete(self, name: str, timeout: Optional[int] = None): + self._index_api.delete_index(name) + self._index_host_store.delete_host(self._config, name) + + if timeout == -1: + return + + if timeout is None: + while self.has(name): + time.sleep(5) + else: + while self.has(name) and timeout >= 0: + time.sleep(5) + timeout -= 5 + if timeout and timeout < 0: + raise ( + TimeoutError( + "Please call the list_indexes API ({}) to confirm if index is deleted".format( + "https://www.pinecone.io/docs/api/operation/list_indexes/" + ) + ) + ) + + def list(self) -> IndexList: + response = self._index_api.list_indexes() + return IndexList(response) + + def describe(self, name: str) -> IndexModel: + api_instance = self._index_api + description = api_instance.describe_index(name) + host = description.host + self._index_host_store.set_host(self._config, name, host) + + return IndexModel(description) + + def has(self, name: str) -> bool: + if name in self.list().names(): + return True + else: + return False + + def configure( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union[PodType, str]] = None, + deletion_protection: Optional[Union[DeletionProtection, str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + api_instance = self._index_api + description = self.describe(name=name) + + req = PineconeDBControlRequestFactory.configure_index_request( + description=description, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + api_instance.configure_index(name, configure_index_request=req) + + def _get_host(self, name: str) -> str: + """@private""" + return self._index_host_store.get_host( + api=self._index_api, config=self._config, index_name=name + ) diff --git a/pinecone/control/types/__init__.py b/pinecone/db_control/types/__init__.py similarity index 100% rename from pinecone/control/types/__init__.py rename to pinecone/db_control/types/__init__.py diff --git a/pinecone/control/types/create_index_for_model_embed.py b/pinecone/db_control/types/create_index_for_model_embed.py similarity index 72% rename from pinecone/control/types/create_index_for_model_embed.py rename to pinecone/db_control/types/create_index_for_model_embed.py index 123474a0..ab7e43ac 100644 --- a/pinecone/control/types/create_index_for_model_embed.py +++ b/pinecone/db_control/types/create_index_for_model_embed.py @@ -1,6 +1,6 @@ from typing import TypedDict, Dict, Union -from ...enums import Metric -from ...data.features.inference import EmbedModel +from pinecone.db_control.enums import Metric +from pinecone.inference import EmbedModel class CreateIndexForModelEmbedTypedDict(TypedDict): diff --git a/pinecone/db_data/__init__.py b/pinecone/db_data/__init__.py new file mode 100644 index 00000000..f2db9a63 --- /dev/null +++ b/pinecone/db_data/__init__.py @@ -0,0 +1,61 @@ +from .index import ( + Index as _Index, + FetchResponse, + QueryResponse, + DescribeIndexStatsResponse, + UpsertResponse, + SparseValues, + Vector, +) +from .dataclasses import * +from .import_error import ( + Index, + IndexClientInstantiationError, + Inference, + InferenceInstantiationError, +) +from .index_asyncio import * +from .errors import ( + VectorDictionaryMissingKeysError, + VectorDictionaryExcessKeysError, + VectorTupleLengthError, + SparseValuesTypeError, + SparseValuesMissingKeysError, + SparseValuesDictionaryExpectedError, + MetadataDictionaryExpectedError, +) + +from .features.bulk_import import ImportErrorMode + + +import warnings + + +def _get_deprecated_import(name, from_module, to_module): + warnings.warn( + f"The import of `{name}` from `{from_module}` has moved to `{to_module}`. " + f"Please update your imports from `from {from_module} import {name}` " + f"to `from {to_module} import {name}`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + ) + # Import from the new location + from pinecone.inference import ( + Inference as _Inference, + AsyncioInference as _AsyncioInference, + RerankModel, + EmbedModel, + ) + + return locals()[name] + + +moved = ["_Inference", "_AsyncioInference", "RerankModel", "EmbedModel"] + + +def __getattr__(name): + if name in locals(): + return locals()[name] + elif name in moved: + return _get_deprecated_import(name, "pinecone.data", "pinecone.inference") + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") diff --git a/pinecone/data/dataclasses/__init__.py b/pinecone/db_data/dataclasses/__init__.py similarity index 100% rename from pinecone/data/dataclasses/__init__.py rename to pinecone/db_data/dataclasses/__init__.py diff --git a/pinecone/data/dataclasses/fetch_response.py b/pinecone/db_data/dataclasses/fetch_response.py similarity index 100% rename from pinecone/data/dataclasses/fetch_response.py rename to pinecone/db_data/dataclasses/fetch_response.py diff --git a/pinecone/data/dataclasses/search_query.py b/pinecone/db_data/dataclasses/search_query.py similarity index 100% rename from pinecone/data/dataclasses/search_query.py rename to pinecone/db_data/dataclasses/search_query.py diff --git a/pinecone/data/dataclasses/search_query_vector.py b/pinecone/db_data/dataclasses/search_query_vector.py similarity index 100% rename from pinecone/data/dataclasses/search_query_vector.py rename to pinecone/db_data/dataclasses/search_query_vector.py diff --git a/pinecone/data/dataclasses/search_rerank.py b/pinecone/db_data/dataclasses/search_rerank.py similarity index 97% rename from pinecone/data/dataclasses/search_rerank.py rename to pinecone/db_data/dataclasses/search_rerank.py index 1b9534ba..0ac4ca4e 100644 --- a/pinecone/data/dataclasses/search_rerank.py +++ b/pinecone/db_data/dataclasses/search_rerank.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from typing import Optional, Dict, Any, List -from ..features.inference import RerankModel +from pinecone.inference import RerankModel @dataclass diff --git a/pinecone/data/dataclasses/sparse_values.py b/pinecone/db_data/dataclasses/sparse_values.py similarity index 100% rename from pinecone/data/dataclasses/sparse_values.py rename to pinecone/db_data/dataclasses/sparse_values.py diff --git a/pinecone/data/dataclasses/utils.py b/pinecone/db_data/dataclasses/utils.py similarity index 100% rename from pinecone/data/dataclasses/utils.py rename to pinecone/db_data/dataclasses/utils.py diff --git a/pinecone/data/dataclasses/vector.py b/pinecone/db_data/dataclasses/vector.py similarity index 100% rename from pinecone/data/dataclasses/vector.py rename to pinecone/db_data/dataclasses/vector.py diff --git a/pinecone/data/errors.py b/pinecone/db_data/errors.py similarity index 100% rename from pinecone/data/errors.py rename to pinecone/db_data/errors.py diff --git a/pinecone/data/features/bulk_import/__init__.py b/pinecone/db_data/features/bulk_import/__init__.py similarity index 100% rename from pinecone/data/features/bulk_import/__init__.py rename to pinecone/db_data/features/bulk_import/__init__.py diff --git a/pinecone/data/features/bulk_import/bulk_import.py b/pinecone/db_data/features/bulk_import/bulk_import.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import.py rename to pinecone/db_data/features/bulk_import/bulk_import.py diff --git a/pinecone/data/features/bulk_import/bulk_import_asyncio.py b/pinecone/db_data/features/bulk_import/bulk_import_asyncio.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import_asyncio.py rename to pinecone/db_data/features/bulk_import/bulk_import_asyncio.py diff --git a/pinecone/data/features/bulk_import/bulk_import_request_factory.py b/pinecone/db_data/features/bulk_import/bulk_import_request_factory.py similarity index 100% rename from pinecone/data/features/bulk_import/bulk_import_request_factory.py rename to pinecone/db_data/features/bulk_import/bulk_import_request_factory.py diff --git a/pinecone/data/import_error.py b/pinecone/db_data/import_error.py similarity index 100% rename from pinecone/data/import_error.py rename to pinecone/db_data/import_error.py diff --git a/pinecone/data/index.py b/pinecone/db_data/index.py similarity index 90% rename from pinecone/data/index.py rename to pinecone/db_data/index.py index ebd5cecd..6c78b849 100644 --- a/pinecone/data/index.py +++ b/pinecone/db_data/index.py @@ -1,8 +1,8 @@ from pinecone.utils.tqdm import tqdm - +import warnings import logging import json -from typing import Union, List, Optional, Dict, Any, Literal +from typing import Union, List, Optional, Dict, Any, Literal, TYPE_CHECKING from pinecone.config import ConfigBuilder @@ -45,6 +45,9 @@ from concurrent.futures import as_completed +if TYPE_CHECKING: + from pinecone.config import Config, OpenApiConfiguration + logger = logging.getLogger(__name__) """ @private """ @@ -55,7 +58,7 @@ def parse_query_response(response: QueryResponse): return response -class Index(IndexInterface, ImportFeatureMixin, PluginAware): +class Index(PluginAware, IndexInterface, ImportFeatureMixin): """ A client for interacting with a Pinecone index via REST API. For improved performance, use the Pinecone GRPC index client. @@ -70,29 +73,29 @@ def __init__( openapi_config=None, **kwargs, ): - self.config = ConfigBuilder.build( + self._config = ConfigBuilder.build( api_key=api_key, host=host, additional_headers=additional_headers, **kwargs ) """ @private """ - self.openapi_config = ConfigBuilder.build_openapi_config(self.config, openapi_config) + self._openapi_config = ConfigBuilder.build_openapi_config(self._config, openapi_config) """ @private """ if pool_threads is None: - self.pool_threads = 5 * cpu_count() + self._pool_threads = 5 * cpu_count() """ @private """ else: - self.pool_threads = pool_threads + self._pool_threads = pool_threads """ @private """ if kwargs.get("connection_pool_maxsize", None): - self.openapi_config.connection_pool_maxsize = kwargs.get("connection_pool_maxsize") + self._openapi_config.connection_pool_maxsize = kwargs.get("connection_pool_maxsize") self._vector_api = setup_openapi_client( api_client_klass=ApiClient, api_klass=VectorOperationsApi, - config=self.config, - openapi_config=self.openapi_config, - pool_threads=pool_threads, + config=self._config, + openapi_config=self._openapi_config, + pool_threads=self._pool_threads, api_version=API_VERSION, ) @@ -101,9 +104,30 @@ def __init__( # Pass the same api_client to the ImportFeatureMixin super().__init__(api_client=self._api_client) - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads + @property + def config(self) -> "Config": + """@private""" + return self._config + + @property + def openapi_config(self) -> "OpenApiConfiguration": + """@private""" + warnings.warn( + "The `openapi_config` property has been renamed to `_openapi_config`. It is considered private and should not be used directly. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, + ) + return self._openapi_config + + @property + def pool_threads(self) -> int: + """@private""" + warnings.warn( + "The `pool_threads` property has been renamed to `_pool_threads`. It is considered private and should not be used directly. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, ) + return self._pool_threads def _openapi_kwargs(self, kwargs: Dict[str, Any]) -> Dict[str, Any]: return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS) diff --git a/pinecone/data/index_asyncio.py b/pinecone/db_data/index_asyncio.py similarity index 100% rename from pinecone/data/index_asyncio.py rename to pinecone/db_data/index_asyncio.py diff --git a/pinecone/data/index_asyncio_interface.py b/pinecone/db_data/index_asyncio_interface.py similarity index 100% rename from pinecone/data/index_asyncio_interface.py rename to pinecone/db_data/index_asyncio_interface.py diff --git a/pinecone/data/interfaces.py b/pinecone/db_data/interfaces.py similarity index 100% rename from pinecone/data/interfaces.py rename to pinecone/db_data/interfaces.py diff --git a/pinecone/db_data/models/__init__.py b/pinecone/db_data/models/__init__.py new file mode 100644 index 00000000..a14d3600 --- /dev/null +++ b/pinecone/db_data/models/__init__.py @@ -0,0 +1 @@ +from pinecone.core.openapi.db_data.models import * diff --git a/pinecone/data/query_results_aggregator.py b/pinecone/db_data/query_results_aggregator.py similarity index 100% rename from pinecone/data/query_results_aggregator.py rename to pinecone/db_data/query_results_aggregator.py diff --git a/pinecone/data/request_factory.py b/pinecone/db_data/request_factory.py similarity index 100% rename from pinecone/data/request_factory.py rename to pinecone/db_data/request_factory.py diff --git a/pinecone/data/sparse_values_factory.py b/pinecone/db_data/sparse_values_factory.py similarity index 100% rename from pinecone/data/sparse_values_factory.py rename to pinecone/db_data/sparse_values_factory.py diff --git a/pinecone/data/types/__init__.py b/pinecone/db_data/types/__init__.py similarity index 100% rename from pinecone/data/types/__init__.py rename to pinecone/db_data/types/__init__.py diff --git a/pinecone/data/types/query_filter.py b/pinecone/db_data/types/query_filter.py similarity index 100% rename from pinecone/data/types/query_filter.py rename to pinecone/db_data/types/query_filter.py diff --git a/pinecone/data/types/search_query_typed_dict.py b/pinecone/db_data/types/search_query_typed_dict.py similarity index 100% rename from pinecone/data/types/search_query_typed_dict.py rename to pinecone/db_data/types/search_query_typed_dict.py diff --git a/pinecone/data/types/search_query_vector_typed_dict.py b/pinecone/db_data/types/search_query_vector_typed_dict.py similarity index 100% rename from pinecone/data/types/search_query_vector_typed_dict.py rename to pinecone/db_data/types/search_query_vector_typed_dict.py diff --git a/pinecone/data/types/search_rerank_typed_dict.py b/pinecone/db_data/types/search_rerank_typed_dict.py similarity index 96% rename from pinecone/data/types/search_rerank_typed_dict.py rename to pinecone/db_data/types/search_rerank_typed_dict.py index 89c4f8d8..2d04fe82 100644 --- a/pinecone/data/types/search_rerank_typed_dict.py +++ b/pinecone/db_data/types/search_rerank_typed_dict.py @@ -1,5 +1,5 @@ from typing import TypedDict, Optional, Union, Dict, Any -from ..features.inference import RerankModel +from pinecone.inference import RerankModel class SearchRerankTypedDict(TypedDict): diff --git a/pinecone/data/types/sparse_vector_typed_dict.py b/pinecone/db_data/types/sparse_vector_typed_dict.py similarity index 100% rename from pinecone/data/types/sparse_vector_typed_dict.py rename to pinecone/db_data/types/sparse_vector_typed_dict.py diff --git a/pinecone/data/types/vector_metadata_dict.py b/pinecone/db_data/types/vector_metadata_dict.py similarity index 100% rename from pinecone/data/types/vector_metadata_dict.py rename to pinecone/db_data/types/vector_metadata_dict.py diff --git a/pinecone/data/types/vector_tuple.py b/pinecone/db_data/types/vector_tuple.py similarity index 100% rename from pinecone/data/types/vector_tuple.py rename to pinecone/db_data/types/vector_tuple.py diff --git a/pinecone/data/types/vector_typed_dict.py b/pinecone/db_data/types/vector_typed_dict.py similarity index 100% rename from pinecone/data/types/vector_typed_dict.py rename to pinecone/db_data/types/vector_typed_dict.py diff --git a/pinecone/data/vector_factory.py b/pinecone/db_data/vector_factory.py similarity index 100% rename from pinecone/data/vector_factory.py rename to pinecone/db_data/vector_factory.py diff --git a/pinecone/exceptions/__init__.py b/pinecone/exceptions/__init__.py index 92b05fd7..f437e90b 100644 --- a/pinecone/exceptions/__init__.py +++ b/pinecone/exceptions/__init__.py @@ -1,4 +1,7 @@ -from pinecone.openapi_support.exceptions import ( +from .exceptions import ( + PineconeConfigurationError, + PineconeProtocolError, + ListConversionException, PineconeException, PineconeApiAttributeError, PineconeApiTypeError, @@ -10,7 +13,6 @@ ForbiddenException, ServiceException, ) -from .exceptions import PineconeConfigurationError, PineconeProtocolError, ListConversionException __all__ = [ "PineconeConfigurationError", diff --git a/pinecone/exceptions/exceptions.py b/pinecone/exceptions/exceptions.py index 3860dc8b..32eed99f 100644 --- a/pinecone/exceptions/exceptions.py +++ b/pinecone/exceptions/exceptions.py @@ -1,4 +1,143 @@ -from pinecone.openapi_support.exceptions import PineconeException +class PineconeException(Exception): + """The base exception class for all exceptions in the Pinecone Python SDK""" + + +class PineconeApiTypeError(PineconeException, TypeError): + def __init__(self, msg, path_to_item=None, valid_classes=None, key_type=None) -> None: + """Raises an exception for TypeErrors + + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list): a list of keys an indices to get to the + current_item + None if unset + valid_classes (tuple): the primitive classes that current item + should be an instance of + None if unset + key_type (bool): False if our value is a value in a dict + True if it is a key in a dict + False if our item is an item in a list + None if unset + """ + self.path_to_item = path_to_item + self.valid_classes = valid_classes + self.key_type = key_type + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiTypeError, self).__init__(full_msg) + + +class PineconeApiValueError(PineconeException, ValueError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list) the path to the exception in the + received_data dict. None if unset + """ + + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiValueError, self).__init__(full_msg) + + +class PineconeApiAttributeError(PineconeException, AttributeError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Raised when an attribute reference or assignment fails. + + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (None/list) the path to the exception in the + received_data dict + """ + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiAttributeError, self).__init__(full_msg) + + +class PineconeApiKeyError(PineconeException, KeyError): + def __init__(self, msg, path_to_item=None) -> None: + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (None/list) the path to the exception in the + received_data dict + """ + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(PineconeApiKeyError, self).__init__(full_msg) + + +class PineconeApiException(PineconeException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + if http_resp: + self.status = http_resp.status + self.reason = http_resp.reason + self.body = http_resp.data + self.headers = http_resp.getheaders() + else: + self.status = status + self.reason = reason + self.body = None + self.headers = None + + def __str__(self): + """Custom error messages for exception""" + error_message = "({0})\nReason: {1}\n".format(self.status, self.reason) + if self.headers: + error_message += "HTTP response headers: {0}\n".format(self.headers) + + if self.body: + error_message += "HTTP response body: {0}\n".format(self.body) + + return error_message + + +class NotFoundException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(NotFoundException, self).__init__(status, reason, http_resp) + + +class UnauthorizedException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(UnauthorizedException, self).__init__(status, reason, http_resp) + + +class ForbiddenException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(ForbiddenException, self).__init__(status, reason, http_resp) + + +class ServiceException(PineconeApiException): + def __init__(self, status=None, reason=None, http_resp=None) -> None: + super(ServiceException, self).__init__(status, reason, http_resp) + + +def render_path(path_to_item): + """Returns a string representation of a path""" + result = "" + for pth in path_to_item: + if isinstance(pth, int): + result += "[{0}]".format(pth) + else: + result += "['{0}']".format(pth) + return result class PineconeProtocolError(PineconeException): diff --git a/pinecone/grpc/__init__.py b/pinecone/grpc/__init__.py index 350047ca..66adb916 100644 --- a/pinecone/grpc/__init__.py +++ b/pinecone/grpc/__init__.py @@ -49,7 +49,7 @@ from .config import GRPCClientConfig from .future import PineconeGrpcFuture -from pinecone.data.dataclasses import Vector, SparseValues +from pinecone.db_data.dataclasses import Vector, SparseValues from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( Vector as GRPCVector, diff --git a/pinecone/grpc/index_grpc.py b/pinecone/grpc/index_grpc.py index 9b68c0b6..bfaf8fff 100644 --- a/pinecone/grpc/index_grpc.py +++ b/pinecone/grpc/index_grpc.py @@ -21,7 +21,7 @@ QueryResponse, IndexDescription as DescribeIndexStatsResponse, ) -from pinecone.models.list_response import ListResponse as SimpleListResponse, Pagination +from pinecone.db_control.models.list_response import ListResponse as SimpleListResponse, Pagination from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( Vector as GRPCVector, QueryVector as GRPCQueryVector, @@ -38,11 +38,11 @@ SparseValues as GRPCSparseValues, ) from pinecone import Vector, SparseValues -from pinecone.data.query_results_aggregator import QueryNamespacesResults, QueryResultsAggregator +from pinecone.db_data.query_results_aggregator import QueryNamespacesResults, QueryResultsAggregator from pinecone.core.grpc.protos.db_data_2025_01_pb2_grpc import VectorServiceStub from .base import GRPCIndexBase from .future import PineconeGrpcFuture -from ..data.types import ( +from ..db_data.types import ( SparseVectorTypedDict, VectorTypedDict, VectorTuple, diff --git a/pinecone/grpc/pinecone.py b/pinecone/grpc/pinecone.py index c78481ff..7c869e8f 100644 --- a/pinecone/grpc/pinecone.py +++ b/pinecone/grpc/pinecone.py @@ -1,5 +1,5 @@ -from ..control.pinecone import Pinecone -from ..config.config import ConfigBuilder +from pinecone import Pinecone +from pinecone.config import ConfigBuilder from .index_grpc import GRPCIndex @@ -122,15 +122,15 @@ def Index(self, name: str = "", host: str = "", **kwargs): raise ValueError("Either name or host must be specified") # Use host if it is provided, otherwise get host from describe_index - index_host = host or self.index_host_store.get_host(self.index_api, self.config, name) + index_host = host or self.db.index._get_host(name) - pt = kwargs.pop("pool_threads", None) or self.pool_threads + pt = kwargs.pop("pool_threads", None) or self._pool_threads config = ConfigBuilder.build( - api_key=self.config.api_key, + api_key=self._config.api_key, host=index_host, - source_tag=self.config.source_tag, - proxy_url=self.config.proxy_url, - ssl_ca_certs=self.config.ssl_ca_certs, + source_tag=self._config.source_tag, + proxy_url=self._config.proxy_url, + ssl_ca_certs=self._config.ssl_ca_certs, ) return GRPCIndex(index_name=name, config=config, pool_threads=pt, **kwargs) diff --git a/pinecone/grpc/sparse_values_factory.py b/pinecone/grpc/sparse_values_factory.py index 240cd8e1..5bb14685 100644 --- a/pinecone/grpc/sparse_values_factory.py +++ b/pinecone/grpc/sparse_values_factory.py @@ -3,8 +3,8 @@ from ..utils import convert_to_list -from ..data import SparseValuesTypeError, SparseValuesMissingKeysError -from ..data.types import SparseVectorTypedDict +from ..db_data import SparseValuesTypeError, SparseValuesMissingKeysError +from ..db_data.types import SparseVectorTypedDict from pinecone.core.grpc.protos.db_data_2025_01_pb2 import SparseValues as GRPCSparseValues from pinecone.core.openapi.db_data.models import SparseValues as OpenApiSparseValues diff --git a/pinecone/grpc/utils.py b/pinecone/grpc/utils.py index dcd19710..c2869e73 100644 --- a/pinecone/grpc/utils.py +++ b/pinecone/grpc/utils.py @@ -13,7 +13,7 @@ IndexDescription as DescribeIndexStatsResponse, NamespaceSummary, ) -from pinecone.data.dataclasses import FetchResponse +from pinecone.db_data.dataclasses import FetchResponse from google.protobuf.struct_pb2 import Struct diff --git a/pinecone/grpc/vector_factory_grpc.py b/pinecone/grpc/vector_factory_grpc.py index 1fe9572b..22efd269 100644 --- a/pinecone/grpc/vector_factory_grpc.py +++ b/pinecone/grpc/vector_factory_grpc.py @@ -8,13 +8,13 @@ from .utils import dict_to_proto_struct from ..utils import fix_tuple_length, convert_to_list from ..utils.constants import REQUIRED_VECTOR_FIELDS, OPTIONAL_VECTOR_FIELDS -from ..data import ( +from ..db_data import ( VectorDictionaryMissingKeysError, VectorDictionaryExcessKeysError, VectorTupleLengthError, MetadataDictionaryExpectedError, ) -from ..data.types import VectorTuple, VectorTypedDict +from ..db_data.types import VectorTuple, VectorTypedDict from .sparse_values_factory import SparseValuesFactory from pinecone.core.grpc.protos.db_data_2025_01_pb2 import ( diff --git a/pinecone/inference/__init__.py b/pinecone/inference/__init__.py new file mode 100644 index 00000000..30e93330 --- /dev/null +++ b/pinecone/inference/__init__.py @@ -0,0 +1,6 @@ +from .repl_overrides import install_repl_overrides +from .inference import Inference +from .inference_asyncio import AsyncioInference +from .inference_request_builder import RerankModel, EmbedModel + +install_repl_overrides() diff --git a/pinecone/data/features/inference/inference.py b/pinecone/inference/inference.py similarity index 80% rename from pinecone/data/features/inference/inference.py rename to pinecone/inference/inference.py index 71ada564..62e6cbcd 100644 --- a/pinecone/data/features/inference/inference.py +++ b/pinecone/inference/inference.py @@ -1,5 +1,6 @@ import logging -from typing import Optional, Dict, List, Union, Any +import warnings +from typing import Optional, Dict, List, Union, Any, TYPE_CHECKING from pinecone.openapi_support import ApiClient from pinecone.core.openapi.inference.apis import InferenceApi @@ -7,13 +8,15 @@ from pinecone.core.openapi.inference import API_VERSION from pinecone.utils import setup_openapi_client, PluginAware - from .inference_request_builder import ( InferenceRequestBuilder, EmbedModel as EmbedModelEnum, RerankModel as RerankModelEnum, ) +if TYPE_CHECKING: + from pinecone.config import Config, OpenApiConfiguration + logger = logging.getLogger(__name__) """ @private """ @@ -44,14 +47,14 @@ class Inference(PluginAware): EmbedModel = EmbedModelEnum RerankModel = RerankModelEnum - def __init__(self, config, openapi_config, **kwargs) -> None: - self.config = config + def __init__(self, config: "Config", openapi_config: "OpenApiConfiguration", **kwargs) -> None: + self._config = config """ @private """ - self.openapi_config = openapi_config + self._openapi_config = openapi_config """ @private """ - self.pool_threads = kwargs.get("pool_threads", 1) + self._pool_threads = kwargs.get("pool_threads", 1) """ @private """ self.__inference_api = setup_openapi_client( @@ -59,13 +62,38 @@ def __init__(self, config, openapi_config, **kwargs) -> None: api_klass=InferenceApi, config=config, openapi_config=openapi_config, - pool_threads=kwargs.get("pool_threads", 1), + pool_threads=self._pool_threads, api_version=API_VERSION, ) - self.load_plugins( - config=self.config, openapi_config=self.openapi_config, pool_threads=self.pool_threads + super().__init__() # Initialize PluginAware + + @property + def config(self) -> "Config": + """@private""" + # The config property is considered private, but the name cannot be changed to include underscore + # without breaking compatibility with plugins in the wild. + return self._config + + @property + def openapi_config(self) -> "OpenApiConfiguration": + """@private""" + warnings.warn( + "The `openapi_config` property has been renamed to `_openapi_config`. It is considered private and should not be used directly. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, + ) + return self._openapi_config + + @property + def pool_threads(self) -> int: + """@private""" + warnings.warn( + "The `pool_threads` property has been renamed to `_pool_threads`. It is considered private and should not be used directly. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, ) + return self._pool_threads def embed( self, diff --git a/pinecone/data/features/inference/inference_asyncio.py b/pinecone/inference/inference_asyncio.py similarity index 100% rename from pinecone/data/features/inference/inference_asyncio.py rename to pinecone/inference/inference_asyncio.py diff --git a/pinecone/data/features/inference/inference_request_builder.py b/pinecone/inference/inference_request_builder.py similarity index 100% rename from pinecone/data/features/inference/inference_request_builder.py rename to pinecone/inference/inference_request_builder.py diff --git a/pinecone/data/features/inference/models/__init__.py b/pinecone/inference/models/__init__.py similarity index 100% rename from pinecone/data/features/inference/models/__init__.py rename to pinecone/inference/models/__init__.py diff --git a/pinecone/data/features/inference/models/embedding_list.py b/pinecone/inference/models/embedding_list.py similarity index 100% rename from pinecone/data/features/inference/models/embedding_list.py rename to pinecone/inference/models/embedding_list.py diff --git a/pinecone/models/index_embed.py b/pinecone/inference/models/index_embed.py similarity index 94% rename from pinecone/models/index_embed.py rename to pinecone/inference/models/index_embed.py index 4d1ccfe3..4c3306d0 100644 --- a/pinecone/models/index_embed.py +++ b/pinecone/inference/models/index_embed.py @@ -1,8 +1,8 @@ from dataclasses import dataclass from typing import Optional, Dict, Any, Union -from ..enums import Metric -from ..data.features.inference import EmbedModel +from pinecone.db_control.enums import Metric +from pinecone.inference.inference_request_builder import EmbedModel @dataclass(frozen=True) diff --git a/pinecone/data/features/inference/models/rerank_result.py b/pinecone/inference/models/rerank_result.py similarity index 100% rename from pinecone/data/features/inference/models/rerank_result.py rename to pinecone/inference/models/rerank_result.py diff --git a/pinecone/data/features/inference/repl_overrides.py b/pinecone/inference/repl_overrides.py similarity index 100% rename from pinecone/data/features/inference/repl_overrides.py rename to pinecone/inference/repl_overrides.py diff --git a/pinecone/control/langchain_import_warnings.py b/pinecone/langchain_import_warnings.py similarity index 100% rename from pinecone/control/langchain_import_warnings.py rename to pinecone/langchain_import_warnings.py diff --git a/pinecone/control/pinecone_interface.py b/pinecone/legacy_pinecone_interface.py similarity index 94% rename from pinecone/control/pinecone_interface.py rename to pinecone/legacy_pinecone_interface.py index c183e611..0b097261 100644 --- a/pinecone/control/pinecone_interface.py +++ b/pinecone/legacy_pinecone_interface.py @@ -1,30 +1,30 @@ from abc import ABC, abstractmethod -from typing import Optional, Dict, Union - - -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexList, - CollectionList, - IndexModel, - IndexEmbed, -) -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict - - -class PineconeDBControlInterface(ABC): +from typing import Optional, Dict, Union, TYPE_CHECKING + +if TYPE_CHECKING: + from pinecone.db_control.models import ( + ServerlessSpec, + PodSpec, + IndexList, + CollectionList, + IndexModel, + IndexEmbed, + ) + from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + + +class LegacyPineconeDBControlInterface(ABC): @abstractmethod def __init__( self, @@ -190,14 +190,16 @@ def __init__( def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int], - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, - ) -> IndexModel: + ) -> "IndexModel": """Creates a Pinecone index. :param name: The name of the index to create. Must be unique within your project and @@ -299,13 +301,15 @@ def create_index( def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: + ) -> "IndexModel": """ :param name: The name of the index to create. Must be unique within your project and cannot be changed once created. Allowed characters are lowercase letters, numbers, @@ -414,7 +418,7 @@ def delete_index(self, name: str, timeout: Optional[int] = None): pass @abstractmethod - def list_indexes(self) -> IndexList: + def list_indexes(self) -> "IndexList": """ :return: Returns an `IndexList` object, which is iterable and contains a list of `IndexModel` objects. The `IndexList` also has a convenience method `names()` @@ -447,7 +451,7 @@ def list_indexes(self) -> IndexList: pass @abstractmethod - def describe_index(self, name: str) -> IndexModel: + def describe_index(self, name: str) -> "IndexModel": """ :param name: the name of the index to describe. :return: Returns an `IndexModel` object @@ -534,8 +538,8 @@ def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): """ @@ -622,7 +626,7 @@ def configure_index( pass @abstractmethod - def create_collection(self, name: str, source: str): + def create_collection(self, name: str, source: str) -> None: """Create a collection from a pod-based index :param name: Name of the collection @@ -631,7 +635,7 @@ def create_collection(self, name: str, source: str): pass @abstractmethod - def list_collections(self) -> CollectionList: + def list_collections(self) -> "CollectionList": """List all collections ```python diff --git a/pinecone/models/__init__.py b/pinecone/models/__init__.py index 86306c1e..fb94ddf5 100644 --- a/pinecone/models/__init__.py +++ b/pinecone/models/__init__.py @@ -1,20 +1,9 @@ -from .index_description import ServerlessSpecDefinition, PodSpecDefinition -from .collection_description import CollectionDescription -from .serverless_spec import ServerlessSpec -from .pod_spec import PodSpec -from .index_list import IndexList -from .collection_list import CollectionList -from .index_model import IndexModel -from .index_embed import IndexEmbed +import warnings -__all__ = [ - "CollectionDescription", - "PodSpec", - "PodSpecDefinition", - "ServerlessSpec", - "ServerlessSpecDefinition", - "IndexList", - "CollectionList", - "IndexModel", - "IndexEmbed", -] +from pinecone.db_control.models import * + +warnings.warn( + "The module at `pinecone.models` has moved to `pinecone.db_control.models`. " + "This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, +) diff --git a/pinecone/openapi_support/__init__.py b/pinecone/openapi_support/__init__.py index 63e3fb0a..890c3007 100644 --- a/pinecone/openapi_support/__init__.py +++ b/pinecone/openapi_support/__init__.py @@ -8,7 +8,7 @@ from .endpoint_utils import ExtraOpenApiKwargsTypedDict, KwargsWithOpenApiKwargDefaultsTypedDict from .asyncio_api_client import AsyncioApiClient from .asyncio_endpoint import AsyncioEndpoint -from .configuration import Configuration +from .configuration_lazy import Configuration from .exceptions import ( PineconeException, PineconeApiAttributeError, diff --git a/pinecone/openapi_support/api_client.py b/pinecone/openapi_support/api_client.py index 421d56cc..ee1e4649 100644 --- a/pinecone/openapi_support/api_client.py +++ b/pinecone/openapi_support/api_client.py @@ -1,14 +1,14 @@ import atexit -from multiprocessing.pool import ThreadPool -from concurrent.futures import ThreadPoolExecutor import io -from typing import Optional, List, Tuple, Dict, Any, Union -from .deserializer import Deserializer +from typing import Optional, List, Tuple, Dict, Any, Union, TYPE_CHECKING +if TYPE_CHECKING: + from multiprocessing.pool import ThreadPool + from concurrent.futures import ThreadPoolExecutor from .rest_urllib3 import Urllib3RestClient -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .exceptions import PineconeApiValueError, PineconeApiException from .api_client_utils import ( parameters_to_tuples, @@ -30,8 +30,8 @@ class ApiClient(object): to the API. More threads means more concurrent API requests. """ - _pool: Optional[ThreadPool] = None - _threadpool_executor: Optional[ThreadPoolExecutor] = None + _pool: Optional["ThreadPool"] = None + _threadpool_executor: Optional["ThreadPoolExecutor"] = None def __init__( self, configuration: Optional[Configuration] = None, pool_threads: Optional[int] = 1 @@ -64,18 +64,22 @@ def close(self): atexit.unregister(self.close) @property - def pool(self): + def pool(self) -> "ThreadPool": """Create thread pool on first request avoids instantiating unused threadpool for blocking clients. """ if self._pool is None: + from multiprocessing.pool import ThreadPool + atexit.register(self.close) self._pool = ThreadPool(self.pool_threads) return self._pool @property - def threadpool_executor(self): + def threadpool_executor(self) -> "ThreadPoolExecutor": if self._threadpool_executor is None: + from concurrent.futures import ThreadPoolExecutor + self._threadpool_executor = ThreadPoolExecutor(max_workers=self.pool_threads) return self._threadpool_executor @@ -186,6 +190,8 @@ def __call_api( # deserialize response data if response_type: + from .deserializer import Deserializer + Deserializer.decode_response(response_type=response_type, response=response_data) return_data = Deserializer.deserialize( response=response_data, diff --git a/pinecone/openapi_support/asyncio_api_client.py b/pinecone/openapi_support/asyncio_api_client.py index 51f2e0ce..43c8e17b 100644 --- a/pinecone/openapi_support/asyncio_api_client.py +++ b/pinecone/openapi_support/asyncio_api_client.py @@ -7,7 +7,7 @@ from .rest_aiohttp import AiohttpRestClient -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .exceptions import PineconeApiValueError, PineconeApiException from .api_client_utils import ( parameters_to_tuples, diff --git a/pinecone/openapi_support/configuration.py b/pinecone/openapi_support/configuration.py index fb6d7d19..e69de29b 100644 --- a/pinecone/openapi_support/configuration.py +++ b/pinecone/openapi_support/configuration.py @@ -1,441 +0,0 @@ -import copy -import logging -import multiprocessing - -from http import client as http_client -from .exceptions import PineconeApiValueError -from typing import TypedDict - - -class HostSetting(TypedDict): - url: str - description: str - - -JSON_SCHEMA_VALIDATION_KEYWORDS = { - "multipleOf", - "maximum", - "exclusiveMaximum", - "minimum", - "exclusiveMinimum", - "maxLength", - "minLength", - "pattern", - "maxItems", - "minItems", -} - - -class Configuration: - """Class to hold the configuration of the API client. - - :param host: Base url - :param api_key: Dict to store API key(s). - Each entry in the dict specifies an API key. - The dict key is the name of the security scheme in the OAS specification. - The dict value is the API key secret. - :param api_key_prefix: Dict to store API prefix (e.g. Bearer) - The dict key is the name of the security scheme in the OAS specification. - The dict value is an API key prefix when generating the auth data. - :param discard_unknown_keys: Boolean value indicating whether to discard - unknown properties. A server may send a response that includes additional - properties that are not known by the client in the following scenarios: - 1. The OpenAPI document is incomplete, i.e. it does not match the server - implementation. - 2. The client was generated using an older version of the OpenAPI document - and the server has been upgraded since then. - If a schema in the OpenAPI document defines the additionalProperties attribute, - then all undeclared properties received by the server are injected into the - additional properties map. In that case, there are undeclared properties, and - nothing to discard. - :param disabled_client_side_validations (string): Comma-separated list of - JSON schema validation keywords to disable JSON schema structural validation - rules. The following keywords may be specified: multipleOf, maximum, - exclusiveMaximum, minimum, exclusiveMinimum, maxLength, minLength, pattern, - maxItems, minItems. - By default, the validation is performed for data generated locally by the client - and data received from the server, independent of any validation performed by - the server side. If the input data does not satisfy the JSON schema validation - rules specified in the OpenAPI document, an exception is raised. - If disabled_client_side_validations is set, structural validation is - disabled. This can be useful to troubleshoot data validation problem, such as - when the OpenAPI document validation rules do not match the actual API data - received by the server. - :param server_operation_index: Mapping from operation ID to an index to server - configuration. - :param server_operation_variables: Mapping from operation ID to a mapping with - string values to replace variables in templated server configuration. - The validation of enums is performed for variables with defined enum values before. - :param ssl_ca_cert: str - the path to a file of concatenated CA certificates - in PEM format - - :Example: - - API Key Authentication Example. - Given the following security scheme in the OpenAPI specification: - components: - securitySchemes: - cookieAuth: # name for the security scheme - type: apiKey - in: cookie - name: JSESSIONID # cookie name - - You can programmatically set the cookie: - - conf = pinecone.openapi_support.Configuration( - api_key={'cookieAuth': 'abc123'} - api_key_prefix={'cookieAuth': 'JSESSIONID'} - ) - - The following cookie will be added to the HTTP request: - Cookie: JSESSIONID abc123 - """ - - _default = None - - def __init__( - self, - host=None, - api_key=None, - api_key_prefix=None, - discard_unknown_keys=False, - disabled_client_side_validations="", - server_index=None, - server_variables=None, - server_operation_index=None, - server_operation_variables=None, - ssl_ca_cert=None, - ): - """Constructor""" - self._base_path = "https://api.pinecone.io" if host is None else host - """Default Base url - """ - self.server_index = 0 if server_index is None and host is None else server_index - self.server_operation_index = server_operation_index or {} - """Default server index - """ - self.server_variables = server_variables or {} - self.server_operation_variables = server_operation_variables or {} - """Default server variables - """ - self.temp_folder_path = None - """Temp file folder for downloading files - """ - # Authentication Settings - self.api_key = {} - if api_key: - self.api_key = api_key - """dict to store API key(s) - """ - self.api_key_prefix = {} - if api_key_prefix: - self.api_key_prefix = api_key_prefix - """dict to store API prefix (e.g. Bearer) - """ - self.refresh_api_key_hook = None - """function hook to refresh API key if expired - """ - self.discard_unknown_keys = discard_unknown_keys - self.disabled_client_side_validations = disabled_client_side_validations - self.logger = {} - """Logging Settings - """ - self.logger["package_logger"] = logging.getLogger("pinecone.openapi_support") - self.logger["urllib3_logger"] = logging.getLogger("urllib3") - self.logger_format = "%(asctime)s %(levelname)s %(message)s" - """Log format - """ - self.logger_stream_handler = None - """Log stream handler - """ - self.logger_file_handler = None - """Log file handler - """ - self.logger_file = None - """Debug file location - """ - self.debug = False - """Debug switch - """ - - self.verify_ssl = True - """SSL/TLS verification - Set this to false to skip verifying SSL certificate when calling API - from https server. - """ - self.ssl_ca_cert = ssl_ca_cert - """Set this to customize the certificate file to verify the peer. - """ - self.cert_file = None - """client certificate file - """ - self.key_file = None - """client key file - """ - self.assert_hostname = None - """Set this to True/False to enable/disable SSL hostname verification. - """ - - self.connection_pool_maxsize = multiprocessing.cpu_count() * 5 - """urllib3 connection pool's maximum number of connections saved - per pool. urllib3 uses 1 connection as default value, but this is - not the best value when you are making a lot of possibly parallel - requests to the same host, which is often the case here. - cpu_count * 5 is used as default value to increase performance. - """ - - self.proxy = None - """Proxy URL - """ - self.proxy_headers = None - """Proxy headers - """ - self.safe_chars_for_path_param = "" - """Safe chars for path_param - """ - self.retries = None - """Adding retries to override urllib3 default value 3 - """ - # Enable client side validation - self.client_side_validation = True - - # Options to pass down to the underlying urllib3 socket - self.socket_options = None - - def __deepcopy__(self, memo): - cls = self.__class__ - result = cls.__new__(cls) - memo[id(self)] = result - for k, v in self.__dict__.items(): - if k not in ("logger", "logger_file_handler"): - setattr(result, k, copy.deepcopy(v, memo)) - # shallow copy of loggers - result.logger = copy.copy(self.logger) - # use setters to configure loggers - result.logger_file = self.logger_file - result.debug = self.debug - return result - - def __setattr__(self, name, value): - object.__setattr__(self, name, value) - if name == "disabled_client_side_validations": - s = set(filter(None, value.split(","))) - for v in s: - if v not in JSON_SCHEMA_VALIDATION_KEYWORDS: - raise PineconeApiValueError("Invalid keyword: '{0}''".format(v)) - self._disabled_client_side_validations = s - - @classmethod - def set_default(cls, default): - """Set default instance of configuration. - - It stores default configuration, which can be - returned by get_default_copy method. - - :param default: object of Configuration - """ - cls._default = copy.deepcopy(default) - - @classmethod - def get_default_copy(cls): - """Return new instance of configuration. - - This method returns newly created, based on default constructor, - object of Configuration class or returns a copy of default - configuration passed by the set_default method. - - :return: The configuration object. - """ - if cls._default is not None: - return copy.deepcopy(cls._default) - return Configuration() - - @property - def logger_file(self): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - return self.__logger_file - - @logger_file.setter - def logger_file(self, value): - """The logger file. - - If the logger_file is None, then add stream handler and remove file - handler. Otherwise, add file handler and remove stream handler. - - :param value: The logger_file path. - :type: str - """ - self.__logger_file = value - if self.__logger_file: - # If set logging file, - # then add file handler and remove stream handler. - self.logger_file_handler = logging.FileHandler(self.__logger_file) - self.logger_file_handler.setFormatter(self.logger_formatter) - for _, logger in self.logger.items(): - logger.addHandler(self.logger_file_handler) - - @property - def debug(self): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - return self.__debug - - @debug.setter - def debug(self, value): - """Debug status - - :param value: The debug status, True or False. - :type: bool - """ - self.__debug = value - if self.__debug: - # if debug status is True, turn on debug logging - for _, logger in self.logger.items(): - logger.setLevel(logging.DEBUG) - # turn on http_client debug - http_client.HTTPConnection.debuglevel = 1 - else: - # if debug status is False, turn off debug logging, - # setting log level to default `logging.WARNING` - for _, logger in self.logger.items(): - logger.setLevel(logging.WARNING) - # turn off http_client debug - http_client.HTTPConnection.debuglevel = 0 - - @property - def logger_format(self): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - return self.__logger_format - - @logger_format.setter - def logger_format(self, value): - """The logger format. - - The logger_formatter will be updated when sets logger_format. - - :param value: The format string. - :type: str - """ - self.__logger_format = value - self.logger_formatter = logging.Formatter(self.__logger_format) - - def get_api_key_with_prefix(self, identifier, alias=None): - """Gets API key (with prefix if set). - - :param identifier: The identifier of apiKey. - :param alias: The alternative identifier of apiKey. - :return: The token for api key authentication. - """ - if self.refresh_api_key_hook is not None: - self.refresh_api_key_hook(self) - key = self.api_key.get(identifier, self.api_key.get(alias) if alias is not None else None) - if key: - prefix = self.api_key_prefix.get(identifier) - if prefix: - return "%s %s" % (prefix, key) - else: - return key - - def auth_settings(self): - """Gets Auth Settings dict for api client. - - :return: The Auth Settings information dict. - """ - auth = {} - if "ApiKeyAuth" in self.api_key: - auth["ApiKeyAuth"] = { - "type": "api_key", - "in": "header", - "key": "Api-Key", - "value": self.get_api_key_with_prefix("ApiKeyAuth"), - } - return auth - - def get_host_settings(self): - """Gets an array of host settings - - :return: An array of host settings - """ - return [{"url": "https://api.pinecone.io", "description": "Production API endpoints"}] - - def get_host_from_settings(self, index, variables=None, servers=None): - """Gets host URL based on the index and variables - :param index: array index of the host settings - :param variables: hash of variable and the corresponding value - :param servers: an array of host settings or None - :return: URL based on host settings - """ - if index is None: - return self._base_path - - variables = {} if variables is None else variables - servers = self.get_host_settings() if servers is None else servers - - try: - server = servers[index] - except IndexError: - raise ValueError( - "Invalid index {0} when selecting the host settings. Must be less than {1}".format( - index, len(servers) - ) - ) - - url = server["url"] - - # go through variables and replace placeholders - for variable_name, variable in server.get("variables", {}).items(): - used_value = variables.get(variable_name, variable["default_value"]) - - if "enum_values" in variable and used_value not in variable["enum_values"]: - raise ValueError( - "The variable `{0}` in the host URL has invalid value {1}. Must be {2}.".format( - variable_name, variables[variable_name], variable["enum_values"] - ) - ) - - url = url.replace("{" + variable_name + "}", used_value) - - return url - - @property - def host(self): - """Return generated host.""" - return self.get_host_from_settings(self.server_index, variables=self.server_variables) - - @host.setter - def host(self, value): - """Fix base path.""" - self._base_path = value - self.server_index = None - - def __repr__(self): - attrs = [ - f"host={self.host}", - "api_key=***", - f"api_key_prefix={self.api_key_prefix}", - f"connection_pool_maxsize={self.connection_pool_maxsize}", - f"discard_unknown_keys={self.discard_unknown_keys}", - f"disabled_client_side_validations={self.disabled_client_side_validations}", - f"server_index={self.server_index}", - f"server_variables={self.server_variables}", - f"server_operation_index={self.server_operation_index}", - f"server_operation_variables={self.server_operation_variables}", - f"ssl_ca_cert={self.ssl_ca_cert}", - ] - return f"Configuration({', '.join(attrs)})" diff --git a/pinecone/openapi_support/configuration_lazy.py b/pinecone/openapi_support/configuration_lazy.py new file mode 100644 index 00000000..27e90a34 --- /dev/null +++ b/pinecone/openapi_support/configuration_lazy.py @@ -0,0 +1,7 @@ +""" +Lazy import for the Configuration class to avoid loading the entire openapi_support package. +""" + +from ..config.openapi_configuration import Configuration + +__all__ = ["Configuration"] diff --git a/pinecone/openapi_support/endpoint_utils.py b/pinecone/openapi_support/endpoint_utils.py index 13522e85..867232b6 100644 --- a/pinecone/openapi_support/endpoint_utils.py +++ b/pinecone/openapi_support/endpoint_utils.py @@ -2,7 +2,7 @@ from .exceptions import PineconeApiTypeError, PineconeApiValueError from typing import Optional, Dict, Tuple, TypedDict, List, Literal, Any from .types import PropertyValidationTypedDict -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .model_utils import validate_and_convert_types, check_allowed_values, check_validations diff --git a/pinecone/openapi_support/exceptions.py b/pinecone/openapi_support/exceptions.py index fcc37da3..c9fcc571 100644 --- a/pinecone/openapi_support/exceptions.py +++ b/pinecone/openapi_support/exceptions.py @@ -1,140 +1 @@ -class PineconeException(Exception): - """The base exception class for all exceptions in the Pinecone Python SDK""" - - -class PineconeApiTypeError(PineconeException, TypeError): - def __init__(self, msg, path_to_item=None, valid_classes=None, key_type=None) -> None: - """Raises an exception for TypeErrors - - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list): a list of keys an indices to get to the - current_item - None if unset - valid_classes (tuple): the primitive classes that current item - should be an instance of - None if unset - key_type (bool): False if our value is a value in a dict - True if it is a key in a dict - False if our item is an item in a list - None if unset - """ - self.path_to_item = path_to_item - self.valid_classes = valid_classes - self.key_type = key_type - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiTypeError, self).__init__(full_msg) - - -class PineconeApiValueError(PineconeException, ValueError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (list) the path to the exception in the - received_data dict. None if unset - """ - - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiValueError, self).__init__(full_msg) - - -class PineconeApiAttributeError(PineconeException, AttributeError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Raised when an attribute reference or assignment fails. - - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (None/list) the path to the exception in the - received_data dict - """ - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiAttributeError, self).__init__(full_msg) - - -class PineconeApiKeyError(PineconeException, KeyError): - def __init__(self, msg, path_to_item=None) -> None: - """ - Args: - msg (str): the exception message - - Keyword Args: - path_to_item (None/list) the path to the exception in the - received_data dict - """ - self.path_to_item = path_to_item - full_msg = msg - if path_to_item: - full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) - super(PineconeApiKeyError, self).__init__(full_msg) - - -class PineconeApiException(PineconeException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - if http_resp: - self.status = http_resp.status - self.reason = http_resp.reason - self.body = http_resp.data - self.headers = http_resp.getheaders() - else: - self.status = status - self.reason = reason - self.body = None - self.headers = None - - def __str__(self): - """Custom error messages for exception""" - error_message = "({0})\nReason: {1}\n".format(self.status, self.reason) - if self.headers: - error_message += "HTTP response headers: {0}\n".format(self.headers) - - if self.body: - error_message += "HTTP response body: {0}\n".format(self.body) - - return error_message - - -class NotFoundException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(NotFoundException, self).__init__(status, reason, http_resp) - - -class UnauthorizedException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(UnauthorizedException, self).__init__(status, reason, http_resp) - - -class ForbiddenException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(ForbiddenException, self).__init__(status, reason, http_resp) - - -class ServiceException(PineconeApiException): - def __init__(self, status=None, reason=None, http_resp=None) -> None: - super(ServiceException, self).__init__(status, reason, http_resp) - - -def render_path(path_to_item): - """Returns a string representation of a path""" - result = "" - for pth in path_to_item: - if isinstance(pth, int): - result += "[{0}]".format(pth) - else: - result += "['{0}']".format(pth) - return result +from pinecone.exceptions import * # noqa: F403 diff --git a/pinecone/openapi_support/model_utils.py b/pinecone/openapi_support/model_utils.py index 4fc4cf0f..163f94b4 100644 --- a/pinecone/openapi_support/model_utils.py +++ b/pinecone/openapi_support/model_utils.py @@ -1,5 +1,4 @@ from datetime import date, datetime # noqa: F401 -from dateutil.parser import parse import inspect import io @@ -1149,6 +1148,8 @@ def deserialize_primitive(data, klass, path_to_item): additional_message = "" try: if klass in {datetime, date}: + from dateutil.parser import parse + additional_message = ( "If you need your parameter to have a fallback " "string value, please set its type as `type: {}` in your " diff --git a/pinecone/openapi_support/rest_aiohttp.py b/pinecone/openapi_support/rest_aiohttp.py index c7121a11..3cab099a 100644 --- a/pinecone/openapi_support/rest_aiohttp.py +++ b/pinecone/openapi_support/rest_aiohttp.py @@ -2,7 +2,7 @@ import certifi import json from .rest_utils import RestClientInterface, RESTResponse, raise_exceptions_or_return -from .configuration import Configuration +from ..config.openapi_configuration import Configuration class AiohttpRestClient(RestClientInterface): diff --git a/pinecone/openapi_support/rest_urllib3.py b/pinecone/openapi_support/rest_urllib3.py index 85d008da..0c1a1c5a 100644 --- a/pinecone/openapi_support/rest_urllib3.py +++ b/pinecone/openapi_support/rest_urllib3.py @@ -4,7 +4,7 @@ import os from typing import Optional from urllib.parse import urlencode, quote -from .configuration import Configuration +from ..config.openapi_configuration import Configuration from .rest_utils import raise_exceptions_or_return, RESTResponse, RestClientInterface import urllib3 diff --git a/pinecone/pinecone.py b/pinecone/pinecone.py new file mode 100644 index 00000000..e5bc112a --- /dev/null +++ b/pinecone/pinecone.py @@ -0,0 +1,327 @@ +import logging +from typing import Optional, Dict, Union, TYPE_CHECKING +from multiprocessing import cpu_count +import warnings + +from pinecone.config import PineconeConfig, ConfigBuilder + +from .legacy_pinecone_interface import LegacyPineconeDBControlInterface + +from pinecone.utils import normalize_host, PluginAware, docslinks +from .langchain_import_warnings import _build_langchain_attribute_error_message + +logger = logging.getLogger(__name__) +""" @private """ + +if TYPE_CHECKING: + from pinecone.config import Config, OpenApiConfiguration + from pinecone.db_data import ( + _Index as Index, + _Inference as Inference, + _IndexAsyncio as IndexAsyncio, + ) + from pinecone.db_control import DBControl + from pinecone.db_control.index_host_store import IndexHostStore + from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.db_control.models import ( + ServerlessSpec, + PodSpec, + IndexModel, + IndexList, + CollectionList, + IndexEmbed, + ) + + +class Pinecone(PluginAware, LegacyPineconeDBControlInterface): + """ + A client for interacting with Pinecone APIs. + """ + + def __init__( + self, + api_key: Optional[str] = None, + host: Optional[str] = None, + proxy_url: Optional[str] = None, + proxy_headers: Optional[Dict[str, str]] = None, + ssl_ca_certs: Optional[str] = None, + ssl_verify: Optional[bool] = None, + additional_headers: Optional[Dict[str, str]] = {}, + pool_threads: Optional[int] = None, + **kwargs, + ): + for deprecated_kwarg in {"config", "openapi_config", "index_api"}: + if deprecated_kwarg in kwargs: + raise NotImplementedError( + f"Passing {deprecated_kwarg} is no longer supported. Please pass individual settings such as proxy_url, proxy_headers, ssl_ca_certs, and ssl_verify directly to the Pinecone constructor as keyword arguments. See the README at {docslinks['README']} for examples." + ) + + self._config = PineconeConfig.build( + api_key=api_key, + host=host, + additional_headers=additional_headers, + proxy_url=proxy_url, + proxy_headers=proxy_headers, + ssl_ca_certs=ssl_ca_certs, + ssl_verify=ssl_verify, + **kwargs, + ) + """ @private """ + + self._openapi_config = ConfigBuilder.build_openapi_config(self._config, **kwargs) + """ @private """ + + if pool_threads is None: + self._pool_threads = 5 * cpu_count() + """ @private """ + else: + self._pool_threads = pool_threads + """ @private """ + + self._inference: Optional["Inference"] = None # Lazy initialization + """ @private """ + + self._db_control: Optional["DBControl"] = None # Lazy initialization + """ @private """ + + super().__init__() # Initialize PluginAware + + @property + def inference(self) -> "Inference": + """ + Inference is a namespace where an instance of the `pinecone.data.features.inference.inference.Inference` class is lazily created and cached. + """ + if self._inference is None: + from pinecone.db_data import _Inference + + self._inference = _Inference(config=self._config, openapi_config=self._openapi_config) + return self._inference + + @property + def db(self) -> "DBControl": + """ + DBControl is a namespace where an instance of the `pinecone.control.db_control.DBControl` class is lazily created and cached. + """ + if self._db_control is None: + from pinecone.db_control import DBControl + + self._db_control = DBControl( + config=self._config, + openapi_config=self._openapi_config, + pool_threads=self._pool_threads, + ) + return self._db_control + + @property + def index_host_store(self) -> "IndexHostStore": + """@private""" + warnings.warn( + "The `index_host_store` property is deprecated. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, + ) + return self.db.index._index_host_store + + @property + def config(self) -> "Config": + """@private""" + # The config property is considered private, but the name cannot be changed to include underscore + # without breaking compatibility with plugins in the wild. + return self._config + + @property + def openapi_config(self) -> "OpenApiConfiguration": + """@private""" + warnings.warn( + "The `openapi_config` property has been renamed to `_openapi_config`. It is considered private and should not be used directly. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, + ) + return self._openapi_config + + @property + def pool_threads(self) -> int: + """@private""" + warnings.warn( + "The `pool_threads` property has been renamed to `_pool_threads`. It is considered private and should not be used directly. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, + ) + return self._pool_threads + + @property + def index_api(self) -> "ManageIndexesApi": + """@private""" + warnings.warn( + "The `index_api` property is deprecated. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, + ) + return self.db._index_api + + def create_index( + self, + name: str, + spec: Union[Dict, "ServerlessSpec", "PodSpec"], + dimension: Optional[int] = None, + metric: Optional[Union["Metric", str]] = "cosine", + timeout: Optional[int] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = "disabled", + vector_type: Optional[Union["VectorType", str]] = "dense", + tags: Optional[Dict[str, str]] = None, + ) -> "IndexModel": + return self.db.index.create( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + timeout=timeout, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + ) + + def create_index_for_model( + self, + name: str, + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = "disabled", + timeout: Optional[int] = None, + ) -> "IndexModel": + return self.db.index.create_for_model( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + timeout=timeout, + ) + + def delete_index(self, name: str, timeout: Optional[int] = None): + return self.db.index.delete(name=name, timeout=timeout) + + def list_indexes(self) -> "IndexList": + return self.db.index.list() + + def describe_index(self, name: str) -> "IndexModel": + return self.db.index.describe(name=name) + + def has_index(self, name: str) -> bool: + return self.db.index.has(name=name) + + def configure_index( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + return self.db.index.configure( + name=name, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + + def create_collection(self, name: str, source: str) -> None: + return self.db.collection.create(name=name, source=source) + + def list_collections(self) -> "CollectionList": + return self.db.collection.list() + + def delete_collection(self, name: str) -> None: + return self.db.collection.delete(name=name) + + def describe_collection(self, name: str): + return self.db.collection.describe(name=name) + + @staticmethod + def from_texts(*args, **kwargs): + """@private""" + raise AttributeError(_build_langchain_attribute_error_message("from_texts")) + + @staticmethod + def from_documents(*args, **kwargs): + """@private""" + raise AttributeError(_build_langchain_attribute_error_message("from_documents")) + + def Index(self, name: str = "", host: str = "", **kwargs) -> "Index": + from pinecone.db_data import _Index + + if name == "" and host == "": + raise ValueError("Either name or host must be specified") + + pt = kwargs.pop("pool_threads", None) or self._pool_threads + api_key = self._config.api_key + openapi_config = self._openapi_config + + if host != "": + check_realistic_host(host) + + # Use host url if it is provided + index_host = normalize_host(host) + else: + # Otherwise, get host url from describe_index using the index name + index_host = self.db.index._get_host(name) + + return _Index( + host=index_host, + api_key=api_key, + pool_threads=pt, + openapi_config=openapi_config, + source_tag=self.config.source_tag, + **kwargs, + ) + + def IndexAsyncio(self, host: str, **kwargs) -> "IndexAsyncio": + from pinecone.db_data import _IndexAsyncio + + api_key = self._config.api_key + openapi_config = self._openapi_config + + if host is None or host == "": + raise ValueError("A host must be specified") + + check_realistic_host(host) + index_host = normalize_host(host) + + return _IndexAsyncio( + host=index_host, + api_key=api_key, + openapi_config=openapi_config, + source_tag=self.config.source_tag, + **kwargs, + ) + + +def check_realistic_host(host: str) -> None: + """@private + + Checks whether a user-provided host string seems plausible. + Someone could erroneously pass an index name as the host by + mistake, and if they have done that we'd like to give them a + simple error message as feedback rather than attempting to + call the url and getting a more cryptic DNS resolution error. + """ + + if "." not in host and "localhost" not in host: + raise ValueError( + f"You passed '{host}' as the host but this does not appear to be valid. Call describe_index() to confirm the host of the index." + ) diff --git a/pinecone/pinecone_asyncio.py b/pinecone/pinecone_asyncio.py new file mode 100644 index 00000000..278039e6 --- /dev/null +++ b/pinecone/pinecone_asyncio.py @@ -0,0 +1,292 @@ +import logging +import warnings +from typing import Optional, Dict, Union, TYPE_CHECKING + +from pinecone.config import PineconeConfig, ConfigBuilder + +from pinecone.utils import normalize_host +from pinecone.utils import docslinks + +from .pinecone_interface_asyncio import PineconeAsyncioDBControlInterface +from .pinecone import check_realistic_host + +if TYPE_CHECKING: + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict + from pinecone.db_data import _IndexAsyncio + from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.db_control.models import ( + ServerlessSpec, + PodSpec, + IndexModel, + IndexList, + CollectionList, + IndexEmbed, + ) + from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi + from pinecone.db_control.index_host_store import IndexHostStore + +logger = logging.getLogger(__name__) +""" @private """ + + +class PineconeAsyncio(PineconeAsyncioDBControlInterface): + """ + `PineconeAsyncio` is an asyncio client for interacting with Pinecone's control plane API. + + This class implements methods for managing and interacting with Pinecone resources + such as collections and indexes. + + To perform data operations such as inserting and querying vectors, use the `IndexAsyncio` class. + + ```python + import asyncio + from pinecone import Pinecone + + async def main(): + pc = Pinecone() + async with pc.IndexAsyncio(host="my-index.pinecone.io") as idx: + await idx.upsert(vectors=[(1, [1, 2, 3]), (2, [4, 5, 6])]) + + asyncio.run(main()) + ``` + """ + + def __init__( + self, + api_key: Optional[str] = None, + host: Optional[str] = None, + proxy_url: Optional[str] = None, + # proxy_headers: Optional[Dict[str, str]] = None, + ssl_ca_certs: Optional[str] = None, + ssl_verify: Optional[bool] = None, + additional_headers: Optional[Dict[str, str]] = {}, + **kwargs, + ): + for deprecated_kwarg in {"config", "openapi_config"}: + if deprecated_kwarg in kwargs: + raise NotImplementedError( + f"Passing {deprecated_kwarg} is no longer supported. Please pass individual settings such as proxy_url, ssl_ca_certs, and ssl_verify directly to the Pinecone constructor as keyword arguments. See the README at {docslinks['README']} for examples." + ) + + for unimplemented_kwarg in {"proxy_headers"}: + if unimplemented_kwarg in kwargs: + raise NotImplementedError( + f"You have passed {unimplemented_kwarg} but this configuration has not been implemented for PineconeAsyncio." + ) + + self._config = PineconeConfig.build( + api_key=api_key, + host=host, + additional_headers=additional_headers, + proxy_url=proxy_url, + proxy_headers=None, + ssl_ca_certs=ssl_ca_certs, + ssl_verify=ssl_verify, + **kwargs, + ) + """ @private """ + + self._openapi_config = ConfigBuilder.build_openapi_config(self._config, **kwargs) + """ @private """ + + self._inference = None # Lazy initialization + """ @private """ + + self._db_control = None # Lazy initialization + """ @private """ + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_value, traceback): + await self.close() + + async def close(self): + """Cleanup resources used by the Pinecone client. + + This method should be called when the client is no longer needed so that + it can cleanup the aioahttp session and other resources. + + After close has been called, the client instance should not be used. + + ```python + import asyncio + from pinecone import PineconeAsyncio + + async def main(): + pc = PineconeAsyncio() + desc = await pc.describe_index(name="my-index") + await pc.close() + + asyncio.run(main()) + ``` + + If you are using the client as a context manager, the close method is called automatically + when exiting. + + ```python + import asyncio + from pinecone import PineconeAsyncio + + async def main(): + async with PineconeAsyncio() as pc: + desc = await pc.describe_index(name="my-index") + + # No need to call close in this case because the "async with" syntax + # automatically calls close when exiting the block. + asyncio.run(main()) + ``` + + """ + await self.db._index_api.api_client.close() + + @property + def inference(self): + """Dynamically create and cache the AsyncioInference instance.""" + if self._inference is None: + from pinecone.db_data import _AsyncioInference + + self._inference = _AsyncioInference(api_client=self.db._index_api.api_client) + return self._inference + + @property + def db(self): + if self._db_control is None: + from .db_control.db_control_asyncio import DBControlAsyncio + + self._db_control = DBControlAsyncio( + config=self._config, openapi_config=self._openapi_config + ) + return self._db_control + + @property + def index_host_store(self) -> "IndexHostStore": + """@private""" + warnings.warn( + "The `index_host_store` property is deprecated. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, + ) + return self.db.index._index_host_store + + @property + def index_api(self) -> "ManageIndexesApi": + """@private""" + warnings.warn( + "The `index_api` property is deprecated. This warning will become an error in a future version of the Pinecone Python SDK.", + DeprecationWarning, + stacklevel=2, + ) + return self.db._index_api + + async def create_index( + self, + name: str, + spec: Union[Dict, "ServerlessSpec", "PodSpec"], + dimension: Optional[int] = None, + metric: Optional[Union["Metric", str]] = "cosine", + timeout: Optional[int] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = "disabled", + vector_type: Optional[Union["VectorType", str]] = "dense", + tags: Optional[Dict[str, str]] = None, + ) -> "IndexModel": + resp = await self.db.index.create( + name=name, + spec=spec, + dimension=dimension, + metric=metric, + deletion_protection=deletion_protection, + vector_type=vector_type, + tags=tags, + timeout=timeout, + ) + return resp + + async def create_index_for_model( + self, + name: str, + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], + tags: Optional[Dict[str, str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = "disabled", + timeout: Optional[int] = None, + ) -> "IndexModel": + return await self.db.index.create_for_model( + name=name, + cloud=cloud, + region=region, + embed=embed, + tags=tags, + deletion_protection=deletion_protection, + timeout=timeout, + ) + + async def delete_index(self, name: str, timeout: Optional[int] = None): + return await self.db.index.delete(name=name, timeout=timeout) + + async def list_indexes(self) -> "IndexList": + return await self.db.index.list() + + async def describe_index(self, name: str) -> "IndexModel": + return await self.db.index.describe(name=name) + + async def has_index(self, name: str) -> bool: + return await self.db.index.has(name=name) + + async def configure_index( + self, + name: str, + replicas: Optional[int] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, + tags: Optional[Dict[str, str]] = None, + ): + return await self.db.index.configure( + name=name, + replicas=replicas, + pod_type=pod_type, + deletion_protection=deletion_protection, + tags=tags, + ) + + async def create_collection(self, name: str, source: str): + return await self.db.collection.create(name=name, source=source) + + async def list_collections(self) -> "CollectionList": + return await self.db.collection.list() + + async def delete_collection(self, name: str): + return await self.db.collection.delete(name=name) + + async def describe_collection(self, name: str): + return await self.db.collection.describe(name=name) + + def IndexAsyncio(self, host: str, **kwargs) -> "_IndexAsyncio": + from pinecone.db_data import _IndexAsyncio + + api_key = self._config.api_key + openapi_config = self._openapi_config + + if host is None or host == "": + raise ValueError("A host must be specified") + + check_realistic_host(host) + index_host = normalize_host(host) + + return _IndexAsyncio( + host=index_host, + api_key=api_key, + openapi_config=openapi_config, + source_tag=self._config.source_tag, + **kwargs, + ) diff --git a/pinecone/control/pinecone_interface_asyncio.py b/pinecone/pinecone_interface_asyncio.py similarity index 95% rename from pinecone/control/pinecone_interface_asyncio.py rename to pinecone/pinecone_interface_asyncio.py index a732bce9..31d1feba 100644 --- a/pinecone/control/pinecone_interface_asyncio.py +++ b/pinecone/pinecone_interface_asyncio.py @@ -1,32 +1,31 @@ from abc import ABC, abstractmethod -from typing import Optional, Dict, Union - - -from pinecone.config import Config - -from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi - - -from pinecone.models import ( - ServerlessSpec, - PodSpec, - IndexList, - CollectionList, - IndexModel, - IndexEmbed, -) -from pinecone.enums import ( - Metric, - VectorType, - DeletionProtection, - PodType, - CloudProvider, - AwsRegion, - GcpRegion, - AzureRegion, -) -from .types import CreateIndexForModelEmbedTypedDict +from typing import Optional, Dict, Union, TYPE_CHECKING + +if TYPE_CHECKING: + from pinecone.config import Config + + from pinecone.core.openapi.db_control.api.manage_indexes_api import ManageIndexesApi + + from pinecone.db_control.models import ( + ServerlessSpec, + PodSpec, + IndexList, + CollectionList, + IndexModel, + IndexEmbed, + ) + from pinecone.db_control.enums import ( + Metric, + VectorType, + DeletionProtection, + PodType, + CloudProvider, + AwsRegion, + GcpRegion, + AzureRegion, + ) + from pinecone.db_control.types import CreateIndexForModelEmbedTypedDict class PineconeAsyncioDBControlInterface(ABC): @@ -39,10 +38,10 @@ def __init__( proxy_headers: Optional[Dict[str, str]] = None, ssl_ca_certs: Optional[str] = None, ssl_verify: Optional[bool] = None, - config: Optional[Config] = None, + config: Optional["Config"] = None, additional_headers: Optional[Dict[str, str]] = {}, pool_threads: Optional[int] = 1, - index_api: Optional[ManageIndexesApi] = None, + index_api: Optional["ManageIndexesApi"] = None, **kwargs, ): """ @@ -291,12 +290,14 @@ async def main(): async def create_index( self, name: str, - spec: Union[Dict, ServerlessSpec, PodSpec], + spec: Union[Dict, "ServerlessSpec", "PodSpec"], dimension: Optional[int], - metric: Optional[Union[Metric, str]] = Metric.COSINE, + metric: Optional[Union["Metric", str]] = "Metric.COSINE", timeout: Optional[int] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, - vector_type: Optional[Union[VectorType, str]] = VectorType.DENSE, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", + vector_type: Optional[Union["VectorType", str]] = "VectorType.DENSE", tags: Optional[Dict[str, str]] = None, ): """Creates a Pinecone index. @@ -408,13 +409,15 @@ async def main(): async def create_index_for_model( self, name: str, - cloud: Union[CloudProvider, str], - region: Union[AwsRegion, GcpRegion, AzureRegion, str], - embed: Union[IndexEmbed, CreateIndexForModelEmbedTypedDict], + cloud: Union["CloudProvider", str], + region: Union["AwsRegion", "GcpRegion", "AzureRegion", str], + embed: Union["IndexEmbed", "CreateIndexForModelEmbedTypedDict"], tags: Optional[Dict[str, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = DeletionProtection.DISABLED, + deletion_protection: Optional[ + Union["DeletionProtection", str] + ] = "DeletionProtection.DISABLED", timeout: Optional[int] = None, - ) -> IndexModel: + ) -> "IndexModel": """ :param name: The name of the index to create. Must be unique within your project and cannot be changed once created. Allowed characters are lowercase letters, numbers, @@ -533,7 +536,7 @@ async def main(): pass @abstractmethod - async def list_indexes(self) -> IndexList: + async def list_indexes(self) -> "IndexList": """ :return: Returns an `IndexList` object, which is iterable and contains a list of `IndexModel` objects. The `IndexList` also has a convenience method `names()` @@ -574,7 +577,7 @@ async def main(): pass @abstractmethod - async def describe_index(self, name: str) -> IndexModel: + async def describe_index(self, name: str) -> "IndexModel": """ :param name: the name of the index to describe. :return: Returns an `IndexModel` object @@ -669,8 +672,8 @@ async def configure_index( self, name: str, replicas: Optional[int] = None, - pod_type: Optional[Union[PodType, str]] = None, - deletion_protection: Optional[Union[DeletionProtection, str]] = None, + pod_type: Optional[Union["PodType", str]] = None, + deletion_protection: Optional[Union["DeletionProtection", str]] = None, tags: Optional[Dict[str, str]] = None, ): """ @@ -779,7 +782,7 @@ async def create_collection(self, name: str, source: str): pass @abstractmethod - async def list_collections(self) -> CollectionList: + async def list_collections(self) -> "CollectionList": """List all collections ```python diff --git a/pinecone/utils/docslinks.py b/pinecone/utils/docslinks.py index a86dd1da..cdfe66cd 100644 --- a/pinecone/utils/docslinks.py +++ b/pinecone/utils/docslinks.py @@ -1,10 +1,12 @@ -from pinecone.core.openapi.db_control import API_VERSION +def versioned_url(template: str): + return lambda version: template.format(version) + docslinks = { "README": "https://github.com/pinecone-io/pinecone-python-client/blob/main/README.md", "GITHUB_REPO": "https://github.com/pinecone-io/pinecone-python-client", "LANGCHAIN_IMPORT_KB_ARTICLE": "https://docs.pinecone.io/troubleshooting/pinecone-attribute-errors-with-langchain", - "API_DESCRIBE_INDEX": "https://docs.pinecone.io/reference/api/{}/control-plane/describe_index".format( - API_VERSION + "API_DESCRIBE_INDEX": versioned_url( + "https://docs.pinecone.io/reference/api/{}/control-plane/describe_index" ), } diff --git a/pinecone/utils/find_legacy_imports.py b/pinecone/utils/find_legacy_imports.py new file mode 100755 index 00000000..5421de28 --- /dev/null +++ b/pinecone/utils/find_legacy_imports.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +""" +Script to identify legacy imports that were previously available via star imports. + +This script analyzes the codebase to find all imports that were previously available +via star imports but are no longer imported at the top level. +""" + +import ast +import os +from typing import Set + + +def find_star_imports(file_path: str) -> Set[str]: + """ + Find all star imports in a file. + + Args: + file_path: Path to the file to analyze. + + Returns: + Set of module names that are imported with star imports. + """ + with open(file_path, "r") as f: + content = f.read() + + try: + tree = ast.parse(content) + except SyntaxError: + print(f"Warning: Could not parse {file_path}") + return set() + + star_imports = set() + + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom) and node.names[0].name == "*": + module_name = node.module + if module_name: + star_imports.add(module_name) + + return star_imports + + +def find_imported_names(file_path: str) -> Set[str]: + """ + Find all names that are imported in a file. + + Args: + file_path: Path to the file to analyze. + + Returns: + Set of imported names. + """ + with open(file_path, "r") as f: + content = f.read() + + try: + tree = ast.parse(content) + except SyntaxError: + print(f"Warning: Could not parse {file_path}") + return set() + + imported_names = set() + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for name in node.names: + imported_names.add(name.name) + elif isinstance(node, ast.ImportFrom): + for name in node.names: + if name.name != "*": + imported_names.add(name.name) + + return imported_names + + +def find_module_exports(module_path: str) -> Set[str]: + """ + Find all names that are exported by a module. + + Args: + module_path: Path to the module to analyze. + + Returns: + Set of exported names. + """ + try: + module = __import__(module_path, fromlist=["*"]) + return set(dir(module)) + except ImportError: + print(f"Warning: Could not import {module_path}") + return set() + + +def main(): + """ + Main function to find legacy imports. + """ + # Get the package root directory + package_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + # Find the __init__.py file + init_file = os.path.join(package_root, "__init__.py") + + # Find star imports in the __init__.py file + star_imports = find_star_imports(init_file) + + # Find all imported names in the __init__.py file + imported_names = find_imported_names(init_file) + + # Find all module exports + module_exports = {} + for module_name in star_imports: + module_exports[module_name] = find_module_exports(module_name) + + # Find all files in the package + package_files = [] + for root, _, files in os.walk(package_root): + for file in files: + if file.endswith(".py") and not file.startswith("__"): + package_files.append(os.path.join(root, file)) + + # Find all imports in the package + package_imports = set() + for file in package_files: + package_imports.update(find_imported_names(file)) + + # Find legacy imports + legacy_imports = {} + for module_name, exports in module_exports.items(): + for export in exports: + if export in package_imports and export not in imported_names: + legacy_imports[f"pinecone.{export}"] = (module_name, export) + + # Print the legacy imports + print("LEGACY_IMPORTS = {") + for legacy_name, (module_path, actual_name) in sorted(legacy_imports.items()): + print(f" '{legacy_name}': ('{module_path}', '{actual_name}'),") + print("}") + + +if __name__ == "__main__": + main() diff --git a/pinecone/utils/lazy_imports.py b/pinecone/utils/lazy_imports.py new file mode 100644 index 00000000..0a55c8f4 --- /dev/null +++ b/pinecone/utils/lazy_imports.py @@ -0,0 +1,76 @@ +""" +Lazy import handler for Pinecone. + +This module provides a way to lazily load imports that were previously +available via star imports but are no longer imported at the top level. +""" + +import importlib +import sys +from types import ModuleType +from typing import Dict, Optional, Tuple, cast + +# Dictionary mapping import names to their actual module paths +# Format: 'name': ('module_path', 'actual_name') +LAZY_IMPORTS: Dict[str, Tuple[str, str]] = { + # Example: 'Vector': ('pinecone.db_data.models', 'Vector') + # Add all your lazy imports here +} + + +class LazyModule: + def __init__(self, original_module, lazy_imports): + self._original_module = original_module + self._lazy_imports = lazy_imports + self._loaded_attrs = {} + + def __dir__(self): + # Get the base directory listing from the original module + base_dir = dir(self._original_module) + + # Add lazy-loaded items + lazy_dir = list(self._lazy_imports.keys()) + + # Return combined list + return sorted(set(base_dir + lazy_dir)) + + def __getattr__(self, name): + # First try the original module + try: + return getattr(self._original_module, name) + except AttributeError: + pass + + # Then try lazy imports + if name in self._lazy_imports: + if name not in self._loaded_attrs: + module_path, item_name = self._lazy_imports[name] + module = importlib.import_module(module_path) + self._loaded_attrs[name] = getattr(module, item_name) + return self._loaded_attrs[name] + + raise AttributeError(f"module '{self._original_module.__name__}' has no attribute '{name}'") + + +def setup_lazy_imports(lazy_imports: Optional[Dict[str, Tuple[str, str]]] = None) -> None: + """ + Set up the lazy import handler. + + Args: + lazy_imports: Optional dictionary of imports to handle lazily. + If None, uses the default LAZY_IMPORTS dictionary. + """ + if lazy_imports is None: + lazy_imports = LAZY_IMPORTS + + # Only proceed if the pinecone module is already loaded + if "pinecone" not in sys.modules: + return + + # Create a proxy for the pinecone module + original_module = sys.modules["pinecone"] + proxy = LazyModule(original_module, lazy_imports) + + # Replace the pinecone module with our proxy + # Use a type cast to satisfy the type checker + sys.modules["pinecone"] = cast(ModuleType, proxy) diff --git a/pinecone/utils/legacy_imports.py b/pinecone/utils/legacy_imports.py new file mode 100644 index 00000000..9013acdd --- /dev/null +++ b/pinecone/utils/legacy_imports.py @@ -0,0 +1,112 @@ +""" +Legacy import handler for Pinecone. + +This module provides a simple way to handle legacy imports that were previously +available via star imports but are no longer imported at the top level. +""" + +import importlib +import sys +from types import ModuleType +from typing import Dict, Optional, Set, Any, Tuple, cast + +# Dictionary mapping legacy import names to their actual module paths +# Format: 'name': ('module_path', 'actual_name') +LEGACY_IMPORTS: Dict[str, Tuple[str, str]] = { + # Example: 'Vector': ('pinecone.db_data.models', 'Vector') + # Add all your legacy imports here +} + + +class LegacyImportProxy: + """ + A proxy module that handles legacy imports with warnings. + + This class is used to replace the pinecone module in sys.modules + to handle legacy imports that were previously available via star imports. + """ + + def __init__(self, original_module: Any, legacy_imports: Dict[str, Tuple[str, str]]): + """ + Initialize the proxy module. + + Args: + original_module: The original module to proxy. + legacy_imports: Dictionary of legacy imports to handle. + """ + self._original_module = original_module + self._legacy_imports = legacy_imports + self._warned_imports: Set[str] = set() + self._loaded_modules: Dict[str, Any] = {} + + def __getattr__(self, name: str) -> Any: + """ + Handle attribute access for legacy imports. + + Args: + name: The name of the attribute being accessed. + + Returns: + The requested attribute. + + Raises: + AttributeError: If the attribute cannot be found. + """ + # First, try to get the attribute from the original module + try: + return getattr(self._original_module, name) + except AttributeError: + pass + + # Check if this is a legacy import + if name in self._legacy_imports: + module_path, actual_name = self._legacy_imports[name] + + # Only warn once per import + # if name not in self._warned_imports: + # warnings.warn( + # f"Importing '{name}' directly from 'pinecone' is deprecated. " + # f"Please import it from '{module_path}' instead. " + # f"This import will be removed in a future version.", + # DeprecationWarning, + # stacklevel=2 + # ) + # self._warned_imports.add(name) + + # Load the module if not already loaded + if module_path not in self._loaded_modules: + try: + self._loaded_modules[module_path] = importlib.import_module(module_path) + except ImportError: + raise AttributeError(f"module 'pinecone' has no attribute '{name}'") + + # Get the actual object + module = self._loaded_modules[module_path] + if hasattr(module, actual_name): + return getattr(module, actual_name) + + raise AttributeError(f"module 'pinecone' has no attribute '{name}'") + + +def setup_legacy_imports(legacy_imports: Optional[Dict[str, Tuple[str, str]]] = None) -> None: + """ + Set up the legacy import handler. + + Args: + legacy_imports: Optional dictionary of legacy imports to handle. + If None, uses the default LEGACY_IMPORTS dictionary. + """ + if legacy_imports is None: + legacy_imports = LEGACY_IMPORTS + + # Only proceed if the pinecone module is already loaded + if "pinecone" not in sys.modules: + return + + # Create a proxy for the pinecone module + original_module = sys.modules["pinecone"] + proxy = LegacyImportProxy(original_module, legacy_imports) + + # Replace the pinecone module with our proxy + # Use a type cast to satisfy the type checker + sys.modules["pinecone"] = cast(ModuleType, proxy) diff --git a/pinecone/utils/plugin_aware.py b/pinecone/utils/plugin_aware.py index ce1e4b87..4a27351a 100644 --- a/pinecone/utils/plugin_aware.py +++ b/pinecone/utils/plugin_aware.py @@ -1,7 +1,7 @@ +from typing import Any from .setup_openapi_client import build_plugin_setup_client from pinecone.config import Config -from pinecone.openapi_support.configuration import Configuration as OpenApiConfig - +from pinecone.config.openapi_configuration import Configuration as OpenApiConfig from pinecone_plugin_interface import load_and_install as install_plugins import logging @@ -11,17 +11,122 @@ class PluginAware: + """ + Base class for classes that support plugin loading. + + This class provides functionality to lazily load plugins when they are first accessed. + Subclasses must set the following attributes before calling super().__init__(): + - config: Config + - _openapi_config: OpenApiConfig + - _pool_threads: int + + These attributes are considered private and should not be used by end users. The config property + is also considered private, but it was originally named without the underscore and this name + can't be changed without breaking compatibility with plugins in the wild. + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """ + Initialize the PluginAware class. + + Args: + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. + + Raises: + AttributeError: If required attributes are not set in the subclass. + """ + logger.debug("PluginAware __init__ called for %s", self.__class__.__name__) + + self._plugins_loaded = False + """ @private """ + + # Check for required attributes after super().__init__ has been called + missing_attrs = [] + if not hasattr(self, "config"): + missing_attrs.append("config") + if not hasattr(self, "_openapi_config"): + missing_attrs.append("_openapi_config") + if not hasattr(self, "_pool_threads"): + missing_attrs.append("_pool_threads") + + if missing_attrs: + logger.error( + f"PluginAware class requires the following attributes: {', '.join(missing_attrs)}. " + f"These must be set in the {self.__class__.__name__} class's __init__ method " + f"before calling super().__init__()." + ) + raise AttributeError( + f"PluginAware class requires the following attributes: {', '.join(missing_attrs)}. " + f"These must be set in the {self.__class__.__name__} class's __init__ method " + f"before calling super().__init__()." + ) + + def __getattr__(self, name: str) -> Any: + """ + Called when an attribute is not found through the normal lookup process. + This allows for lazy loading of plugins when they are first accessed. + + Args: + name: The name of the attribute being accessed. + + Returns: + The requested attribute. + + Raises: + AttributeError: If the attribute cannot be found after loading plugins. + """ + logger.debug("__getattr__ called for %s", name) + # Check if this is one of the required attributes that should be set by subclasses + required_attrs = ["config", "_openapi_config", "_pool_threads"] + if name in required_attrs: + raise AttributeError( + f"'{self.__class__.__name__}' object has no attribute '{name}'. " + f"This attribute must be set in the subclass's __init__ method " + f"before calling super().__init__()." + ) + + if not self._plugins_loaded: + logger.debug("Loading plugins for %s", self.__class__.__name__) + # Use object.__getattribute__ to avoid triggering __getattr__ again + try: + config = object.__getattribute__(self, "config") + openapi_config = object.__getattribute__(self, "_openapi_config") + pool_threads = object.__getattribute__(self, "_pool_threads") + self.load_plugins( + config=config, openapi_config=openapi_config, pool_threads=pool_threads + ) + self._plugins_loaded = True + try: + return object.__getattribute__(self, name) + except AttributeError: + pass + except AttributeError: + # If we can't get the required attributes, we can't load plugins + pass + + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") + def load_plugins( self, config: Config, openapi_config: OpenApiConfig, pool_threads: int ) -> None: - """@private""" + """ + Load plugins for the parent class. + + Args: + config: The Pinecone configuration. + openapi_config: The OpenAPI configuration. + pool_threads: The number of threads in the pool. + """ try: - # I don't expect this to ever throw, but wrapping this in a - # try block just in case to make sure a bad plugin doesn't - # halt client initialization. + # Build the OpenAPI client for plugin setup openapi_client_builder = build_plugin_setup_client( config=config, openapi_config=openapi_config, pool_threads=pool_threads ) + # Install plugins install_plugins(self, openapi_client_builder) + logger.debug("Plugins loaded successfully for %s", self.__class__.__name__) + except ImportError as e: + logger.warning("Failed to import plugin module: %s", e) except Exception as e: - logger.error(f"Error loading plugins: {e}") + logger.error("Error loading plugins: %s", e, exc_info=True) diff --git a/poetry.lock b/poetry.lock index 427dc1e2..2e4de34b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1001,13 +1001,13 @@ files = [ [[package]] name = "packaging" -version = "23.2" +version = "24.2" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -1130,6 +1130,21 @@ pygments = ">=2.12.0" [package.extras] dev = ["hypothesis", "mypy", "pdoc-pyo3-sample-library (==1.0.11)", "pygments (>=2.14.0)", "pytest", "pytest-cov", "pytest-timeout", "ruff", "tox", "types-pygments"] +[[package]] +name = "pinecone-plugin-assistant" +version = "1.6.0" +description = "Assistant plugin for Pinecone SDK" +optional = false +python-versions = "<4.0,>=3.9" +files = [ + {file = "pinecone_plugin_assistant-1.6.0-py3-none-any.whl", hash = "sha256:d742273d136fba66d020f1af01af2c6bfbc802f7ff9ddf46c590b7ea26932175"}, + {file = "pinecone_plugin_assistant-1.6.0.tar.gz", hash = "sha256:b7c531743f87269ba567dd6084b1464b62636a011564d414bc53147571b2f2c1"}, +] + +[package.dependencies] +packaging = ">=24.2,<25.0" +requests = ">=2.32.3,<3.0.0" + [[package]] name = "pinecone-plugin-interface" version = "0.0.7" @@ -1332,6 +1347,29 @@ files = [ googleapis-common-protos = "*" protobuf = ">=4.21.0" +[[package]] +name = "psutil" +version = "7.0.0" +description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." +optional = false +python-versions = ">=3.6" +files = [ + {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, + {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, + {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, + {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, + {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, + {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, + {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, + {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, +] + +[package.extras] +dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] + [[package]] name = "py-cpuinfo" version = "9.0.0" @@ -1497,6 +1535,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.1.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.9" +files = [ + {file = "python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d"}, + {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2023.3.post1" @@ -1668,6 +1720,17 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tuna" +version = "0.5.11" +description = "Visualize Python performance profiles" +optional = false +python-versions = ">=3.6" +files = [ + {file = "tuna-0.5.11-py3-none-any.whl", hash = "sha256:ab352a6d836014ace585ecd882148f1f7c68be9ea4bf9e9298b7127594dab2ef"}, + {file = "tuna-0.5.11.tar.gz", hash = "sha256:d47f3e39e80af961c8df016ac97d1643c3c60b5eb451299da0ab5fe411d8866c"}, +] + [[package]] name = "types-protobuf" version = "4.24.0.4" @@ -1796,6 +1859,20 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +[[package]] +name = "vprof" +version = "0.38" +description = "Visual profiler for Python" +optional = false +python-versions = "*" +files = [ + {file = "vprof-0.38-py3-none-any.whl", hash = "sha256:91b91d8868176c29e0fe3426c9239d11cd192c7144c7baf26a211e48923a5ee8"}, + {file = "vprof-0.38.tar.gz", hash = "sha256:7f1000912eeb7a450c7c94d3cc96739af45ad0ff01d5abcc0b09a175d40ffadb"}, +] + +[package.dependencies] +psutil = ">=3" + [[package]] name = "yarl" version = "1.17.2" @@ -1899,4 +1976,4 @@ grpc = ["googleapis-common-protos", "grpcio", "grpcio", "grpcio", "lz4", "protob [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "8a10046c5826a9773836e6b3ee50271bb0077d0faf32d709f1e65c4bb1fc53ea" +content-hash = "0145fb2ae02a1cdd6fe06b191a6761dcee4f4c67fe057b48d6b501d7b0b504da" diff --git a/pyproject.toml b/pyproject.toml index 0525d08d..788b2870 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,10 @@ urllib3_mock = "0.3.3" responses = ">=0.8.1" ruff = "^0.9.3" beautifulsoup4 = "^4.13.3" +pinecone-plugin-assistant = "^1.6.0" +vprof = "^0.38" +tuna = "^0.5.11" +python-dotenv = "^1.1.0" [tool.poetry.extras] @@ -152,7 +156,7 @@ docstring-code-line-length = "dynamic" [tool.ruff.lint.per-file-ignores] # F403 Allow star imports # F401 allow imported but unused -"__init__.py" = ["F401", "F403"] +"__init__.py" = ["F401", "F403", "F405"] # E402 Allow module level import not at top of file so # tqdm warnings can be disabled ahead of loading any code diff --git a/tests/__init__.py b/tests/__init__.py index f2dab92a..e69de29b 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +0,0 @@ -import logging - -logging.basicConfig( - format="%(levelname)s [%(asctime)s] %(name)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S" -) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index e69de29b..76acad39 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -0,0 +1,3 @@ +import dotenv + +dotenv.load_dotenv() diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 00000000..e42eedae --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,40 @@ +import logging +from pinecone import Pinecone +from datetime import datetime, timedelta + +logger = logging.getLogger(__name__) + + +def pytest_sessionfinish(session, exitstatus): + """ + Hook that runs after all tests have completed. + This is a good place to clean up any resources that were created during the test session. + """ + logger.info("Running final cleanup after all tests...") + + try: + # Initialize Pinecone client + pc = Pinecone() + + # Get all indexes + indexes = pc.list_indexes() + + # Find test indexes (those created during this test run) + test_indexes = [idx for idx in indexes.names() if idx.startswith("test-")] + + # Delete test indexes that are older than 1 hour (in case of failed cleanup) + for index_name in test_indexes: + try: + description = pc.describe_index(name=index_name) + created_at = datetime.fromisoformat(description.created_at.replace("Z", "+00:00")) + + if datetime.now(created_at.tzinfo) - created_at > timedelta(hours=1): + logger.info(f"Cleaning up old test index: {index_name}") + pc.delete_index(name=index_name, timeout=-1) + except Exception as e: + logger.warning(f"Failed to clean up index {index_name}: {str(e)}") + + except Exception as e: + logger.error(f"Error during final cleanup: {str(e)}") + + logger.info("Final cleanup completed") diff --git a/tests/integration/control/collections/__init__.py b/tests/integration/control/collections/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/control/collections/conftest.py b/tests/integration/control/collections/conftest.py new file mode 100644 index 00000000..bb592cee --- /dev/null +++ b/tests/integration/control/collections/conftest.py @@ -0,0 +1,136 @@ +import pytest +import uuid +import time +import logging +import dotenv +import os +from datetime import datetime +from pinecone import Pinecone, NotFoundException, PineconeApiException +from ...helpers import get_environment_var + +dotenv.load_dotenv() + +logger = logging.getLogger(__name__) +""" @private """ + +# Generate a unique ID for the entire test run +RUN_ID = str(uuid.uuid4()) + + +@pytest.fixture() +def index_tags(request): + test_name = request.node.name + if test_name is None: + test_name = "" + else: + test_name = test_name.replace(":", "_").replace("[", "_").replace("]", "_") + + tags = { + "test-suite": "pinecone-python-client", + "test-run": RUN_ID, + "test": test_name, + "created-at": datetime.now().strftime("%Y-%m-%d"), + } + + if os.getenv("USER"): + tags["user"] = os.getenv("USER") + return tags + + +@pytest.fixture() +def pc(): + api_key = get_environment_var("PINECONE_API_KEY") + return Pinecone( + api_key=api_key, additional_headers={"sdk-test-suite": "pinecone-python-client"} + ) + + +@pytest.fixture() +def pod_environment(): + return get_environment_var("PINECONE_ENVIRONMENT", "us-east1-gcp") + + +def delete_with_retry(pc, index_name, retries=0, sleep_interval=5): + logger.debug( + "Deleting index " + + index_name + + ", retry " + + str(retries) + + ", next sleep interval " + + str(sleep_interval) + ) + try: + pc.db.index.delete(name=index_name, timeout=-1) + except NotFoundException: + pass + except PineconeApiException as e: + if e.error.code == "PRECONDITON_FAILED": + if retries > 5: + raise Exception("Unable to delete index " + index_name) + time.sleep(sleep_interval) + delete_with_retry(pc, index_name, retries + 1, sleep_interval * 2) + else: + logger.error(e.__class__) + logger.error(e) + raise Exception("Unable to delete index " + index_name) + except Exception as e: + logger.error(e.__class__) + logger.error(e) + raise Exception("Unable to delete index " + index_name) + + +def pytest_sessionfinish(session, exitstatus): + """ + Hook that runs after all tests have completed. + This is a good place to clean up any resources that were created during the test session. + """ + logger.info("Running final cleanup after all collection tests...") + + try: + pc = Pinecone() + indexes = pc.db.index.list() + test_indexes = [ + idx for idx in indexes if idx.tags is not None and idx.tags.get("test-run") == RUN_ID + ] + + logger.info(f"Indexes to delete: {[idx.name for idx in test_indexes]}") + + for idx in test_indexes: + if idx.deletion_protection == "enabled": + logger.info(f"Disabling deletion protection for index: {idx.name}") + pc.db.index.configure(name=idx.name, deletion_protection="disabled") + # Wait for index to be updated with status ready + logger.info(f"Waiting for index {idx.name} to be ready...") + timeout = 60 + while True and timeout > 0: + is_ready = pc.db.index.describe(name=idx.name).ready + if is_ready: + break + time.sleep(1) + timeout -= 1 + if timeout <= 0: + logger.warning(f"Index {idx.name} did not become ready in time") + else: + logger.info(f"Deletion protection is already disabled for index: {idx.name}") + + for idx in test_indexes: + try: + logger.info(f"Deleting index: {idx.name}") + pc.db.index.delete(name=idx.name, timeout=-1) + except Exception as e: + logger.warning(f"Failed to delete index {idx.name}: {str(e)}") + + collections = pc.db.collection.list() + logger.info(f"Collections to delete: {[col.name for col in collections]}") + + for col in collections: + try: + logger.info(f"Deleting collection: {col.name}") + pc.db.collection.delete(name=col.name) + except Exception as e: + logger.warning(f"Failed to delete collection {col.name}: {str(e)}") + + except Exception as e: + logger.error(f"Error during final cleanup: {str(e)}") + + logger.info("Final cleanup of collections tests completed") diff --git a/tests/integration/control/collections/helpers.py b/tests/integration/control/collections/helpers.py new file mode 100644 index 00000000..58633a69 --- /dev/null +++ b/tests/integration/control/collections/helpers.py @@ -0,0 +1,57 @@ +import time +import random +import logging + +logger = logging.getLogger(__name__) + + +def random_vector(dimension): + return [random.uniform(0, 1) for _ in range(dimension)] + + +def attempt_cleanup_collection(pc, collection_name): + max_wait = 120 + time_waited = 0 + deleted = False + + while time_waited < max_wait: + try: + pc.db.collection.delete(name=collection_name) + deleted = True + break + except Exception as e: + # Failures here usually happen because the backend thinks there is still some + # operation pending on the resource. + # These orphaned resources will get cleaned up by the cleanup job later. + logger.debug(f"Error while cleaning up collection: {e}") + logger.debug( + f"Waiting for collection {collection_name} to be deleted. Waited {time_waited} seconds..." + ) + time.sleep(10) + time_waited += 10 + if not deleted: + logger.warning(f"Collection {collection_name} was not deleted after {max_wait} seconds") + + +def attempt_cleanup_index(pc, index_name): + max_wait = 120 + time_waited = 0 + deleted = False + + while time_waited < max_wait: + try: + pc.db.index.delete(name=index_name) + deleted = True + break + except Exception as e: + # Failures here usually happen because the backend thinks there is still some + # operation pending on the resource. + # These orphaned resources will get cleaned up by the cleanup job later. + logger.debug(f"Error while cleaning up index: {e}") + logger.debug( + f"Waiting for index {index_name} to be deleted. Waited {time_waited} seconds..." + ) + time.sleep(10) + time_waited += 10 + if not deleted: + logger.warning(f"Index {index_name} was not deleted after {max_wait} seconds") diff --git a/tests/integration/control/collections/test_dense_index.py b/tests/integration/control/collections/test_dense_index.py new file mode 100644 index 00000000..58ad0832 --- /dev/null +++ b/tests/integration/control/collections/test_dense_index.py @@ -0,0 +1,172 @@ +import time +from pinecone import PodSpec +from ...helpers import generate_index_name, generate_collection_name +import logging +from .helpers import attempt_cleanup_collection, attempt_cleanup_index, random_vector + +logger = logging.getLogger(__name__) + + +class TestCollectionsHappyPath: + def test_dense_index_to_collection_to_index(self, pc, pod_environment, index_tags): + # Create a pod index + index_name = generate_index_name("pod-index") + dimension = 10 + metric = "cosine" + pod_index = pc.db.index.create( + name=index_name, + dimension=dimension, + metric=metric, + spec=PodSpec(environment=pod_environment), + tags=index_tags, + ) + + # Insert some vectors into the pod index + idx = pc.Index(host=pod_index.host) + num_vectors = 10 + namespaces = ["", "test-ns1", "test-ns2"] + for namespace in namespaces: + vectors = [(str(i), random_vector(dimension)) for i in range(num_vectors)] + idx.upsert(vectors=vectors, namespace=namespace) + + # Wait for the vectors to be available + all_vectors_available = False + max_wait = 180 + time_waited = 0 + while not all_vectors_available and time_waited < max_wait: + all_vectors_available = True + desc = idx.describe_index_stats() + for namespace in namespaces: + if ( + desc.namespaces.get(namespace, None) is None + or desc.namespaces[namespace]["vector_count"] != num_vectors + ): + logger.debug(f"Waiting for vectors to be available in namespace {namespace}...") + all_vectors_available = False + break + for namespace in namespaces: + for i in range(num_vectors): + try: + idx.fetch(ids=[str(i)], namespace=namespace) + except Exception: + logger.debug( + f"Waiting for vector {i} to be available in namespace {namespace}..." + ) + all_vectors_available = False + break + if not all_vectors_available: + time.sleep(5) + time_waited += 5 + if not all_vectors_available: + raise Exception(f"Vectors were not available after {max_wait} seconds") + + # Create a collection from the pod index + collection_name = generate_collection_name("coll1") + pc.db.collection.create(name=collection_name, source=index_name) + collection_desc = pc.db.collection.describe(name=collection_name) + logger.debug(f"Collection desc: {collection_desc}") + assert collection_desc["name"] == collection_name + assert collection_desc["environment"] == pod_environment + assert collection_desc["status"] is not None + + # Wait for the collection to be ready + time_waited = 0 + max_wait = 120 + collection_ready = collection_desc["status"] + while collection_ready.lower() != "ready" and time_waited < max_wait: + logger.debug( + f"Waiting for collection {collection_name} to be ready. Waited {time_waited} seconds..." + ) + desc = pc.db.collection.describe(name=collection_name) + logger.debug(f"Collection desc: {desc}") + collection_ready = desc["status"] + if collection_ready.lower() != "ready": + time.sleep(10) + time_waited += 10 + if collection_ready.lower() != "ready": + raise Exception(f"Collection {collection_name} is not ready after {max_wait} seconds") + + # Verify the collection was created + assert collection_name in pc.db.collection.list().names() + + # Verify the collection has the correct info + collection_desc = pc.db.collection.describe(name=collection_name) + logger.debug(f"Collection desc: {collection_desc}") + assert collection_desc["name"] == collection_name + assert collection_desc["environment"] == pod_environment + assert collection_desc["status"] == "Ready" + assert collection_desc["dimension"] == dimension + assert collection_desc["vector_count"] == len(namespaces) * num_vectors + assert collection_desc["size"] is not None + assert collection_desc["size"] > 0 + + # Create new index from collection + index_name2 = generate_index_name("index-from-collection-" + collection_name) + print(f"Creating index {index_name} from collection {collection_name}...") + new_index = pc.db.index.create( + name=index_name2, + dimension=dimension, + metric=metric, + spec=PodSpec(environment=pod_environment, source_collection=collection_name), + tags=index_tags, + ) + logger.debug(f"Created index {index_name2} from collection {collection_name}: {new_index}") + + # Wait for the index to be ready + max_wait = 120 + time_waited = 0 + index_ready = False + while not index_ready and time_waited < max_wait: + logger.debug( + f"Waiting for index {index_name} to be ready. Waited {time_waited} seconds..." + ) + desc = pc.db.index.describe(name=index_name) + logger.debug(f"Index {index_name} status: {desc['status']}") + index_ready = desc["status"]["ready"] == True + if not index_ready: + time.sleep(10) + time_waited += 10 + if not index_ready: + raise Exception(f"Index {index_name} is not ready after {max_wait} seconds") + + new_index_desc = pc.db.index.describe(name=index_name) + logger.debug(f"New index desc: {new_index_desc}") + assert new_index_desc["name"] == index_name + assert new_index_desc["status"]["ready"] == True + + new_idx = pc.Index(name=index_name) + + # Verify stats reflect the vectors present in the collection + stats = new_idx.describe_index_stats() + logger.debug(f"New index stats: {stats}") + assert stats.total_vector_count == len(namespaces) * num_vectors + + # Verify the vectors from the collection can be fetched + for namespace in namespaces: + results = new_idx.fetch(ids=[v[0] for v in vectors], namespace=namespace) + logger.debug(f"Results for namespace {namespace}: {results}") + assert len(results.vectors) != 0 + + # Verify the vectors from the collection can be queried by id + for namespace in namespaces: + for i in range(num_vectors): + results = new_idx.query(top_k=3, id=str(i), namespace=namespace) + logger.debug( + f"Query results for namespace {namespace} and id {i} in index {index_name2}: {results}" + ) + assert len(results.matches) == 3 + + # Compapre with results from original index + original_results = idx.query(top_k=3, id=str(i), namespace=namespace) + logger.debug( + f"Original query results for namespace {namespace} and id {i} in index {index_name}: {original_results}" + ) + assert len(original_results.matches) == 3 + assert original_results.matches[0].id == results.matches[0].id + assert original_results.matches[1].id == results.matches[1].id + assert original_results.matches[2].id == results.matches[2].id + + # Cleanup + attempt_cleanup_collection(pc, collection_name) + attempt_cleanup_index(pc, index_name) + attempt_cleanup_index(pc, index_name2) diff --git a/tests/integration/control/index/__init__.py b/tests/integration/control/index/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/control/index/conftest.py b/tests/integration/control/index/conftest.py new file mode 100644 index 00000000..985c4bb6 --- /dev/null +++ b/tests/integration/control/index/conftest.py @@ -0,0 +1,161 @@ +import pytest +import uuid +import time +import logging +import dotenv +from pinecone import Pinecone, NotFoundException, PineconeApiException +from ...helpers import generate_index_name, get_environment_var, index_tags as index_tags_helper + +dotenv.load_dotenv() + +logger = logging.getLogger(__name__) +""" @private """ + +# Generate a unique ID for the entire test run +RUN_ID = str(uuid.uuid4()) + + +@pytest.fixture() +def index_tags(request): + return index_tags_helper(request, RUN_ID) + + +@pytest.fixture() +def pc(): + api_key = get_environment_var("PINECONE_API_KEY") + return Pinecone( + api_key=api_key, additional_headers={"sdk-test-suite": "pinecone-python-client"} + ) + + +@pytest.fixture() +def pod_environment(): + return get_environment_var("PINECONE_ENVIRONMENT", "us-east1-gcp") + + +@pytest.fixture() +def serverless_cloud(): + return get_environment_var("SERVERLESS_CLOUD", "aws") + + +@pytest.fixture() +def serverless_region(): + return get_environment_var("SERVERLESS_REGION", "us-west-2") + + +@pytest.fixture() +def create_sl_index_params(index_name, serverless_cloud, serverless_region, index_tags): + spec = {"serverless": {"cloud": serverless_cloud, "region": serverless_region}} + return dict(name=index_name, dimension=10, metric="cosine", spec=spec, tags=index_tags) + + +@pytest.fixture() +def index_name(request): + test_name = request.node.name + return generate_index_name(test_name) + + +@pytest.fixture() +def ready_sl_index(pc, index_name, create_sl_index_params): + create_sl_index_params["timeout"] = None + pc.create_index(**create_sl_index_params) + yield index_name + pc.db.index.delete(name=index_name, timeout=-1) + + +@pytest.fixture() +def notready_sl_index(pc, index_name, create_sl_index_params): + create_sl_index_params["timeout"] = -1 + pc.create_index(**create_sl_index_params) + yield index_name + + +def delete_with_retry(pc, index_name, retries=0, sleep_interval=5): + logger.debug( + "Deleting index " + + index_name + + ", retry " + + str(retries) + + ", next sleep interval " + + str(sleep_interval) + ) + try: + pc.db.index.delete(name=index_name, timeout=-1) + except NotFoundException: + pass + except PineconeApiException as e: + if e.error.code == "PRECONDITON_FAILED": + if retries > 5: + raise Exception("Unable to delete index " + index_name) + time.sleep(sleep_interval) + delete_with_retry(pc, index_name, retries + 1, sleep_interval * 2) + else: + logger.error(e.__class__) + logger.error(e) + raise Exception("Unable to delete index " + index_name) + except Exception as e: + logger.error(e.__class__) + logger.error(e) + raise Exception("Unable to delete index " + index_name) + + +@pytest.fixture(autouse=True) +def cleanup(pc, index_name): + yield + + try: + desc = pc.db.index.describe(name=index_name) + if desc.deletion_protection == "enabled": + logger.info(f"Disabling deletion protection for index: {index_name}") + pc.db.index.configure(name=index_name, deletion_protection="disabled") + logger.debug("Attempting to delete index with name: " + index_name) + pc.db.index.delete(name=index_name, timeout=-1) + except Exception: + pass + + +def pytest_sessionfinish(session, exitstatus): + """ + Hook that runs after all tests have completed. + This is a good place to clean up any resources that were created during the test session. + """ + logger.info("Running final cleanup after all tests...") + + try: + pc = Pinecone() + indexes = pc.db.index.list() + test_indexes = [ + idx for idx in indexes if idx.tags is not None and idx.tags.get("test-run") == RUN_ID + ] + + logger.info(f"Indexes to delete: {[idx.name for idx in test_indexes]}") + + for idx in test_indexes: + if idx.deletion_protection == "enabled": + logger.info(f"Disabling deletion protection for index: {idx.name}") + pc.db.index.configure(name=idx.name, deletion_protection="disabled") + # Wait for index to be updated with status ready + logger.info(f"Waiting for index {idx.name} to be ready...") + timeout = 60 + while True and timeout > 0: + is_ready = pc.db.index.describe(name=idx.name).ready + if is_ready: + break + time.sleep(1) + timeout -= 1 + if timeout <= 0: + logger.warning(f"Index {idx.name} did not become ready in time") + else: + logger.info(f"Deletion protection is already disabled for index: {idx.name}") + + for idx in test_indexes: + try: + logger.info(f"Deleting index: {idx.name}") + pc.db.index.delete(name=idx.name, timeout=-1) + except Exception as e: + logger.warning(f"Failed to delete index {idx.name}: {str(e)}") + + except Exception as e: + logger.error(f"Error during final cleanup: {str(e)}") + + logger.info("Final cleanup completed") diff --git a/tests/integration/control/index/test_configure.py b/tests/integration/control/index/test_configure.py new file mode 100644 index 00000000..f4c73094 --- /dev/null +++ b/tests/integration/control/index/test_configure.py @@ -0,0 +1,43 @@ +class TestConfigureIndexTags: + def test_add_index_tags(self, pc, ready_sl_index): + starting_tags = pc.db.index.describe(name=ready_sl_index).tags + assert "foo" not in starting_tags + assert "bar" not in starting_tags + + pc.db.index.configure(name=ready_sl_index, tags={"foo": "FOO", "bar": "BAR"}) + + found_tags = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags is not None + assert found_tags["foo"] == "FOO" + assert found_tags["bar"] == "BAR" + + def test_remove_tags_by_setting_empty_value_for_key(self, pc, ready_sl_index): + pc.db.index.configure(name=ready_sl_index, tags={"foo": "FOO", "bar": "BAR"}) + pc.db.index.configure(name=ready_sl_index, tags={}) + found_tags = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags is not None + assert found_tags.get("foo", None) == "FOO", "foo should not be removed" + assert found_tags.get("bar", None) == "BAR", "bar should not be removed" + + pc.db.index.configure(name=ready_sl_index, tags={"foo": ""}) + found_tags2 = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags2 is not None + assert found_tags2.get("foo", None) is None, "foo should be removed" + assert found_tags2.get("bar", None) == "BAR", "bar should not be removed" + + def test_merge_new_tags_with_existing_tags(self, pc, ready_sl_index): + pc.db.index.configure(name=ready_sl_index, tags={"foo": "FOO", "bar": "BAR"}) + pc.db.index.configure(name=ready_sl_index, tags={"baz": "BAZ"}) + found_tags = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags is not None + assert found_tags.get("foo", None) == "FOO", "foo should not be removed" + assert found_tags.get("bar", None) == "BAR", "bar should not be removed" + assert found_tags.get("baz", None) == "BAZ", "baz should be added" + + def test_remove_multiple_tags(self, pc, ready_sl_index): + pc.db.index.configure(name=ready_sl_index, tags={"foo": "FOO", "bar": "BAR"}) + pc.db.index.configure(name=ready_sl_index, tags={"foo": "", "bar": ""}) + found_tags = pc.db.index.describe(name=ready_sl_index).tags.to_dict() + assert found_tags is not None + assert found_tags.get("foo", None) is None, "foo should be removed" + assert found_tags.get("bar", None) is None, "bar should be removed" diff --git a/tests/integration/control/index/test_create.py b/tests/integration/control/index/test_create.py new file mode 100644 index 00000000..a3aa4406 --- /dev/null +++ b/tests/integration/control/index/test_create.py @@ -0,0 +1,328 @@ +import pytest +import time +from pinecone import ( + Pinecone, + Metric, + VectorType, + DeletionProtection, + ServerlessSpec, + PodSpec, + CloudProvider, + AwsRegion, + PineconeApiValueError, + PineconeApiException, + PineconeApiTypeError, + PodIndexEnvironment, +) + + +class TestCreateServerlessIndexHappyPath: + def test_create_index(self, pc: Pinecone, index_name): + resp = pc.db.index.create( + name=index_name, + dimension=10, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + ) + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" # default value + assert resp.vector_type == "dense" # default value + assert resp.deletion_protection == "disabled" # default value + + desc = pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.dimension == 10 + assert desc.metric == "cosine" + assert desc.deletion_protection == "disabled" # default value + assert desc.vector_type == "dense" # default value + + def test_create_skip_wait(self, pc, index_name): + resp = pc.db.index.create( + name=index_name, + dimension=10, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + timeout=-1, + ) + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" + + def test_create_infinite_wait(self, pc, index_name): + resp = pc.db.index.create( + name=index_name, + dimension=10, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + timeout=None, + ) + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" + + @pytest.mark.parametrize("metric", ["cosine", "euclidean", "dotproduct"]) + def test_create_default_index_with_metric(self, pc, create_sl_index_params, metric): + create_sl_index_params["metric"] = metric + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(create_sl_index_params["name"]) + if isinstance(metric, str): + assert desc.metric == metric + else: + assert desc.metric == metric.value + assert desc.vector_type == "dense" + + @pytest.mark.parametrize( + "metric_enum,vector_type_enum,dim,tags", + [ + (Metric.COSINE, VectorType.DENSE, 10, None), + (Metric.EUCLIDEAN, VectorType.DENSE, 10, {"env": "prod"}), + (Metric.DOTPRODUCT, VectorType.SPARSE, None, {"env": "dev"}), + ], + ) + def test_create_with_enum_values( + self, pc, index_name, metric_enum, vector_type_enum, dim, tags + ): + args = { + "name": index_name, + "metric": metric_enum, + "vector_type": vector_type_enum, + "deletion_protection": DeletionProtection.DISABLED, + "spec": ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + "tags": tags, + } + if dim is not None: + args["dimension"] = dim + + pc.db.index.create(**args) + + desc = pc.db.index.describe(index_name) + assert desc.metric == metric_enum.value + assert desc.vector_type == vector_type_enum.value + assert desc.dimension == dim + assert desc.deletion_protection == DeletionProtection.DISABLED.value + assert desc.name == index_name + assert desc.spec.serverless.cloud == "aws" + assert desc.spec.serverless.region == "us-east-1" + if tags: + assert desc.tags.to_dict() == tags + + @pytest.mark.parametrize("metric", ["cosine", "euclidean", "dotproduct"]) + def test_create_dense_index_with_metric(self, pc, create_sl_index_params, metric): + create_sl_index_params["metric"] = metric + create_sl_index_params["vector_type"] = VectorType.DENSE + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(create_sl_index_params["name"]) + assert desc.metric == metric + assert desc.vector_type == "dense" + + def test_create_with_optional_tags(self, pc, create_sl_index_params): + tags = {"foo": "FOO", "bar": "BAR"} + create_sl_index_params["tags"] = tags + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(create_sl_index_params["name"]) + assert desc.tags.to_dict() == tags + + +class TestCreatePodIndexHappyPath: + def test_create_index_minimal_config( + self, pc: Pinecone, index_name, pod_environment, index_tags + ): + pc.db.index.create( + name=index_name, + dimension=10, + metric="cosine", + spec=PodSpec(environment=pod_environment), + tags=index_tags, + ) + + desc = pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.dimension == 10 + assert desc.metric == "cosine" + assert desc.spec.pod.environment == pod_environment + assert desc.tags.to_dict() == index_tags + assert desc.status.ready == True + assert desc.status.state == "Ready" + assert desc.vector_type == "dense" + + def test_create_index_with_spec_options( + self, pc: Pinecone, index_name, pod_environment, index_tags + ): + pc.db.index.create( + name=index_name, + dimension=10, + metric="cosine", + spec=PodSpec( + environment=pod_environment, + pod_type="p1.x2", + replicas=2, + metadata_config={"indexed": ["foo", "bar"]}, + ), + tags=index_tags, + ) + + desc = pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.dimension == 10 + assert desc.metric == "cosine" + assert desc.spec.pod.environment == pod_environment + assert desc.spec.pod.pod_type == "p1.x2" + assert desc.spec.pod.replicas == 2 + assert desc.spec.pod.metadata_config.indexed == ["foo", "bar"] + + def test_create_index_with_deletion_protection( + self, pc: Pinecone, index_name, pod_environment, index_tags + ): + pc.db.index.create( + name=index_name, + dimension=10, + metric="cosine", + spec=PodSpec(environment=pod_environment), + tags=index_tags, + deletion_protection=DeletionProtection.ENABLED, + ) + + try: + pc.db.index.delete(name=index_name) + except PineconeApiException as e: + assert "Deletion protection is enabled for this index" in str(e) + + pc.db.index.configure(name=index_name, deletion_protection=DeletionProtection.DISABLED) + max_wait_time = 60 + while pc.db.index.describe(name=index_name).status.ready == False: + time.sleep(1) + max_wait_time -= 1 + if max_wait_time <= 0: + raise Exception("Index did not become ready in time") + + pc.db.index.delete(name=index_name) + assert pc.db.index.has(name=index_name) == False + + +class TestCreatePodIndexApiErrorCases: + def test_pod_index_does_not_support_sparse_vectors(self, pc, index_name, index_tags): + with pytest.raises(PineconeApiException) as e: + pc.db.index.create( + name=index_name, + metric="dotproduct", + spec=PodSpec(environment=PodIndexEnvironment.US_EAST1_AWS), + vector_type="sparse", + tags=index_tags, + ) + assert "Sparse vector type is not supported for pod indexes" in str(e.value) + + +class TestCreateServerlessIndexApiErrorCases: + def test_create_index_with_invalid_name(self, pc, create_sl_index_params): + create_sl_index_params["name"] = "Invalid-name" + with pytest.raises(PineconeApiException): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_invalid_metric(self, pc, create_sl_index_params): + create_sl_index_params["metric"] = "invalid" + with pytest.raises(PineconeApiValueError): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_with_invalid_neg_dimension(self, pc, create_sl_index_params): + create_sl_index_params["dimension"] = -1 + with pytest.raises(PineconeApiValueError): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_that_already_exists(self, pc, create_sl_index_params): + pc.db.index.create(**create_sl_index_params) + with pytest.raises(PineconeApiException): + pc.db.index.create(**create_sl_index_params) + + +class TestCreateServerlessIndexWithTimeout: + def test_create_index_default_timeout(self, pc, create_sl_index_params): + create_sl_index_params["timeout"] = None + pc.db.index.create(**create_sl_index_params) + # Waits infinitely for index to be ready + desc = pc.db.index.describe(create_sl_index_params["name"]) + assert desc.status.ready == True + + def test_create_index_when_timeout_set(self, pc, create_sl_index_params): + create_sl_index_params["timeout"] = ( + 1000 # effectively infinite, but different code path from None + ) + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(name=create_sl_index_params["name"]) + assert desc.status.ready == True + + def test_create_index_with_negative_timeout(self, pc, create_sl_index_params): + create_sl_index_params["timeout"] = -1 + pc.db.index.create(**create_sl_index_params) + desc = pc.db.index.describe(create_sl_index_params["name"]) + # Returns immediately without waiting for index to be ready + assert desc.status.ready in [False, True] + + +class TestCreateIndexTypeErrorCases: + def test_create_index_with_invalid_str_dimension(self, pc, create_sl_index_params): + create_sl_index_params["dimension"] = "10" + with pytest.raises(PineconeApiTypeError): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_with_missing_dimension(self, pc, create_sl_index_params): + del create_sl_index_params["dimension"] + with pytest.raises(PineconeApiException): + pc.db.index.create(**create_sl_index_params) + + def test_create_index_w_incompatible_options(self, pc, create_sl_index_params): + create_sl_index_params["pod_type"] = "p1.x2" + create_sl_index_params["environment"] = "us-east1-gcp" + create_sl_index_params["replicas"] = 2 + with pytest.raises(TypeError): + pc.db.index.create(**create_sl_index_params) + + @pytest.mark.parametrize("required_option", ["name", "spec", "dimension"]) + def test_create_with_missing_required_options( + self, pc, create_sl_index_params, required_option + ): + del create_sl_index_params[required_option] + with pytest.raises(Exception) as e: + pc.db.index.create(**create_sl_index_params) + assert required_option.lower() in str(e.value).lower() + + +class TestSparseIndex: + def test_create_sparse_index_minimal_config(self, pc: Pinecone, index_name, index_tags): + pc.db.index.create( + name=index_name, + metric="dotproduct", + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + vector_type=VectorType.SPARSE, + tags=index_tags, + ) + + desc = pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.metric == "dotproduct" + assert desc.vector_type == "sparse" + + +class TestSparseIndexErrorCases: + def test_sending_dimension_with_sparse_index(self, pc, index_tags): + with pytest.raises(ValueError) as e: + pc.db.index.create( + name="test-index", + dimension=10, + metric="dotproduct", + vector_type=VectorType.SPARSE, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + tags=index_tags, + ) + assert "dimension should not be specified for sparse indexes" in str(e.value) + + @pytest.mark.parametrize("bad_metric", ["cosine", "euclidean"]) + def test_sending_metric_other_than_dotproduct_with_sparse_index( + self, pc, index_tags, bad_metric + ): + with pytest.raises(PineconeApiException) as e: + pc.db.index.create( + name="test-index", + metric=bad_metric, + vector_type=VectorType.SPARSE, + spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + tags=index_tags, + ) + assert "Sparse vector indexes must use the metric dotproduct" in str(e.value) diff --git a/tests/integration/control/index/test_delete.py b/tests/integration/control/index/test_delete.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/control/index/test_describe.py b/tests/integration/control/index/test_describe.py new file mode 100644 index 00000000..df7f5896 --- /dev/null +++ b/tests/integration/control/index/test_describe.py @@ -0,0 +1,46 @@ +from pinecone import IndexModel + + +class TestDescribeIndex: + def test_describe_index_when_ready(self, pc, ready_sl_index, create_sl_index_params): + description = pc.db.index.describe(ready_sl_index) + + assert isinstance(description, IndexModel) + assert description.name == ready_sl_index + assert description.dimension == create_sl_index_params["dimension"] + assert description.metric == create_sl_index_params["metric"] + assert ( + description.spec.serverless["cloud"] + == create_sl_index_params["spec"]["serverless"]["cloud"] + ) + assert ( + description.spec.serverless["region"] + == create_sl_index_params["spec"]["serverless"]["region"] + ) + + assert isinstance(description.host, str) + assert description.host != "" + assert ready_sl_index in description.host + + assert description.status.state == "Ready" + assert description.status.ready == True + + def test_describe_index_when_not_ready(self, pc, notready_sl_index, create_sl_index_params): + description = pc.db.index.describe(notready_sl_index) + + assert isinstance(description, IndexModel) + assert description.name == notready_sl_index + assert description.dimension == create_sl_index_params["dimension"] + assert description.metric == create_sl_index_params["metric"] + assert ( + description.spec.serverless["cloud"] + == create_sl_index_params["spec"]["serverless"]["cloud"] + ) + assert ( + description.spec.serverless["region"] + == create_sl_index_params["spec"]["serverless"]["region"] + ) + + assert isinstance(description.host, str) + assert description.host != "" + assert notready_sl_index in description.host diff --git a/tests/integration/control/index/test_has.py b/tests/integration/control/index/test_has.py new file mode 100644 index 00000000..1a356a99 --- /dev/null +++ b/tests/integration/control/index/test_has.py @@ -0,0 +1,18 @@ +from tests.integration.helpers import random_string + + +class TestHasIndex: + def test_index_exists_success(self, pc, create_sl_index_params): + name = create_sl_index_params["name"] + pc.db.index.create(**create_sl_index_params) + has_index = pc.db.index.has(name) + assert has_index == True + + def test_index_does_not_exist(self, pc): + name = random_string(8) + has_index = pc.db.index.has(name) + assert has_index == False + + def test_has_index_with_null_index_name(self, pc): + has_index = pc.db.index.has("") + assert has_index == False diff --git a/tests/integration/control/index/test_list.py b/tests/integration/control/index/test_list.py new file mode 100644 index 00000000..4e217ea5 --- /dev/null +++ b/tests/integration/control/index/test_list.py @@ -0,0 +1,27 @@ +from pinecone import IndexModel + + +class TestListIndexes: + def test_list_indexes_includes_ready_indexes(self, pc, ready_sl_index, create_sl_index_params): + list_response = pc.db.index.list() + assert len(list_response.indexes) != 0 + assert isinstance(list_response.indexes[0], IndexModel) + + created_index = [index for index in list_response.indexes if index.name == ready_sl_index][ + 0 + ] + assert created_index.name == ready_sl_index + assert created_index.dimension == create_sl_index_params["dimension"] + assert created_index.metric == create_sl_index_params["metric"] + assert ready_sl_index in created_index.host + + def test_list_indexes_includes_not_ready_indexes(self, pc, notready_sl_index): + list_response = pc.db.index.list() + assert len(list_response.indexes) != 0 + assert isinstance(list_response.indexes[0], IndexModel) + + created_index = [ + index for index in list_response.indexes if index.name == notready_sl_index + ][0] + assert created_index.name == notready_sl_index + assert notready_sl_index in created_index.name diff --git a/tests/integration/control_asyncio/conftest.py b/tests/integration/control_asyncio/conftest.py index 99cf3c91..33c2b529 100644 --- a/tests/integration/control_asyncio/conftest.py +++ b/tests/integration/control_asyncio/conftest.py @@ -201,6 +201,6 @@ def cleanup(client, index_name): try: logger.debug("Attempting to delete index with name: " + index_name) - client.delete_index(index_name, -1) + client.delete_index(name=index_name, timeout=-1) except Exception: pass diff --git a/tests/integration/control_asyncio/index/__init__.py b/tests/integration/control_asyncio/index/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/control_asyncio/index/conftest.py b/tests/integration/control_asyncio/index/conftest.py new file mode 100644 index 00000000..ea17bc58 --- /dev/null +++ b/tests/integration/control_asyncio/index/conftest.py @@ -0,0 +1,206 @@ +import pytest +import time +import random +import asyncio +from ...helpers import get_environment_var, generate_index_name +import logging +from typing import Callable, Optional, Awaitable, Union + +from pinecone import ( + CloudProvider, + AwsRegion, + ServerlessSpec, + PineconeApiException, + NotFoundException, +) + +logger = logging.getLogger(__name__) + + +def build_client(): + from pinecone import PineconeAsyncio + + return PineconeAsyncio() + + +@pytest.fixture(scope="session") +def client(): + # This returns the sync client. Not for use in tests + # but can be used to help with cleanup after test runs + from pinecone import Pinecone + + return Pinecone() + + +@pytest.fixture(scope="session") +def build_pc(): + return build_client + + +async def poll_for_freshness(asyncio_idx, target_namespace, target_vector_count): + max_wait_time = 60 * 3 # 3 minutes + time_waited = 0 + wait_per_iteration = 5 + + while True: + stats = await asyncio_idx.describe_index_stats() + logger.debug( + "Polling for freshness on index %s. Current vector count: %s. Waiting for: %s", + asyncio_idx, + stats.total_vector_count, + target_vector_count, + ) + if target_namespace == "": + if stats.total_vector_count >= target_vector_count: + break + else: + if ( + target_namespace in stats.namespaces + and stats.namespaces[target_namespace].vector_count >= target_vector_count + ): + break + time_waited += wait_per_iteration + if time_waited >= max_wait_time: + raise TimeoutError( + "Timeout waiting for index to have expected vector count of {}".format( + target_vector_count + ) + ) + await asyncio.sleep(wait_per_iteration) + + return stats + + +async def wait_until( + condition: Union[Callable[[], bool], Callable[[], Awaitable[bool]]], + timeout: Optional[float] = 10.0, + interval: float = 0.1, +) -> None: + """ + Waits asynchronously until the given (async or sync) condition returns True or times out. + + Args: + condition: A callable that returns a boolean or an awaitable boolean, indicating if the wait is over. + timeout: Maximum time in seconds to wait for the condition to become True. If None, wait indefinitely. + interval: Time in seconds between checks of the condition. + + Raises: + asyncio.TimeoutError: If the condition is not met within the timeout period. + """ + start_time = asyncio.get_event_loop().time() + + while True: + result = await condition() if asyncio.iscoroutinefunction(condition) else condition() + if result: + return + + if timeout is not None and (asyncio.get_event_loop().time() - start_time) > timeout: + raise asyncio.TimeoutError("Condition not met within the timeout period.") + + remaining_time = ( + (start_time + timeout) - asyncio.get_event_loop().time() + if timeout is not None + else None + ) + logger.debug( + "Condition not met yet. Waiting for %.2f seconds. Timeout in %.2f seconds.", + interval, + remaining_time, + ) + await asyncio.sleep(interval) + + +@pytest.fixture() +def serverless_cloud(): + return get_environment_var("SERVERLESS_CLOUD", "aws") + + +@pytest.fixture() +def serverless_region(): + return get_environment_var("SERVERLESS_REGION", "us-west-2") + + +@pytest.fixture() +def spec1(serverless_cloud, serverless_region): + return {"serverless": {"cloud": serverless_cloud, "region": serverless_region}} + + +@pytest.fixture() +def spec2(): + return ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1) + + +@pytest.fixture() +def spec3(): + return {"serverless": {"cloud": CloudProvider.AWS, "region": AwsRegion.US_EAST_1}} + + +@pytest.fixture() +def create_sl_index_params(index_name, serverless_cloud, serverless_region): + spec = {"serverless": {"cloud": serverless_cloud, "region": serverless_region}} + return dict(name=index_name, dimension=10, metric="cosine", spec=spec) + + +@pytest.fixture() +def random_vector(): + return [random.uniform(0, 1) for _ in range(10)] + + +@pytest.fixture() +def index_name(request): + test_name = request.node.name + return generate_index_name(test_name) + + +@pytest.fixture() +def ready_sl_index(client, index_name, create_sl_index_params): + create_sl_index_params["timeout"] = None + client.create_index(**create_sl_index_params) + yield index_name + client.delete_index(index_name, -1) + + +@pytest.fixture() +def notready_sl_index(client, index_name, create_sl_index_params): + client.create_index(**create_sl_index_params, timeout=-1) + yield index_name + + +def delete_with_retry(client, index_name, retries=0, sleep_interval=5): + print( + "Deleting index " + + index_name + + ", retry " + + str(retries) + + ", next sleep interval " + + str(sleep_interval) + ) + try: + client.delete_index(index_name, -1) + except NotFoundException: + pass + except PineconeApiException as e: + if e.error.code == "PRECONDITON_FAILED": + if retries > 5: + raise "Unable to delete index " + index_name + time.sleep(sleep_interval) + delete_with_retry(client, index_name, retries + 1, sleep_interval * 2) + else: + print(e.__class__) + print(e) + raise "Unable to delete index " + index_name + except Exception as e: + print(e.__class__) + print(e) + raise "Unable to delete index " + index_name + + +@pytest.fixture(autouse=True) +async def cleanup(client, index_name): + yield + + try: + logger.debug("Attempting to delete index with name: " + index_name) + client.index.delete(name=index_name, timeout=-1) + except Exception: + pass diff --git a/tests/integration/control_asyncio/index/test_create.py b/tests/integration/control_asyncio/index/test_create.py new file mode 100644 index 00000000..b85cfebc --- /dev/null +++ b/tests/integration/control_asyncio/index/test_create.py @@ -0,0 +1,162 @@ +import pytest +from pinecone import ( + PineconeAsyncio, + Metric, + VectorType, + DeletionProtection, + ServerlessSpec, + CloudProvider, + AwsRegion, +) + + +@pytest.mark.asyncio +class TestAsyncioCreateIndex: + @pytest.mark.parametrize("spec_fixture", ("spec1", "spec2", "spec3")) + async def test_create_index(self, index_name, request, spec_fixture): + pc = PineconeAsyncio() + spec = request.getfixturevalue(spec_fixture) + + resp = await pc.db.index.create(name=index_name, dimension=10, spec=spec) + + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" # default value + assert resp.vector_type == "dense" # default value + assert resp.deletion_protection == "disabled" # default value + + desc = await pc.db.index.describe(name=index_name) + assert desc.name == index_name + assert desc.dimension == 10 + assert desc.metric == "cosine" + assert desc.deletion_protection == "disabled" # default value + assert desc.vector_type == "dense" # default value + await pc.close() + + async def test_create_skip_wait(self, index_name, spec1): + pc = PineconeAsyncio() + resp = await pc.db.index.create(name=index_name, dimension=10, spec=spec1, timeout=-1) + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" + await pc.close() + + async def test_create_infinite_wait(self, index_name, spec1): + async with PineconeAsyncio() as pc: + resp = await pc.db.index.create(name=index_name, dimension=10, spec=spec1, timeout=None) + assert resp.name == index_name + assert resp.dimension == 10 + assert resp.metric == "cosine" + + @pytest.mark.parametrize("metric", ["cosine", "euclidean", "dotproduct"]) + async def test_create_default_index_with_metric(self, index_name, metric, spec1): + pc = PineconeAsyncio() + + await pc.db.index.create(name=index_name, dimension=10, spec=spec1, metric=metric) + desc = await pc.db.index.describe(index_name) + if isinstance(metric, str): + assert desc.metric == metric + else: + assert desc.metric == metric.value + assert desc.vector_type == "dense" + await pc.close() + + @pytest.mark.parametrize( + "metric_enum,vector_type_enum,dim,tags", + [ + (Metric.COSINE, VectorType.DENSE, 10, None), + (Metric.EUCLIDEAN, VectorType.DENSE, 10, {"env": "prod"}), + (Metric.DOTPRODUCT, VectorType.SPARSE, None, {"env": "dev"}), + ], + ) + async def test_create_with_enum_values_and_tags( + self, index_name, metric_enum, vector_type_enum, dim, tags + ): + pc = PineconeAsyncio() + args = { + "name": index_name, + "metric": metric_enum, + "vector_type": vector_type_enum, + "deletion_protection": DeletionProtection.DISABLED, + "spec": ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1), + "tags": tags, + } + if dim is not None: + args["dimension"] = dim + + await pc.db.index.create(**args) + + desc = await pc.db.index.describe(index_name) + assert desc.metric == metric_enum.value + assert desc.vector_type == vector_type_enum.value + assert desc.dimension == dim + assert desc.deletion_protection == DeletionProtection.DISABLED.value + assert desc.name == index_name + assert desc.spec.serverless.cloud == "aws" + assert desc.spec.serverless.region == "us-east-1" + if tags: + assert desc.tags.to_dict() == tags + await pc.close() + + @pytest.mark.parametrize("metric", ["cosine", "euclidean", "dotproduct"]) + async def test_create_dense_index_with_metric(self, index_name, spec1, metric): + pc = PineconeAsyncio() + + await pc.create_index( + name=index_name, dimension=10, spec=spec1, metric=metric, vector_type=VectorType.DENSE + ) + + desc = await pc.db.index.describe(index_name) + assert desc.metric == metric + assert desc.vector_type == "dense" + await pc.close() + + async def test_create_with_optional_tags(self, index_name, spec1): + pc = PineconeAsyncio() + tags = {"foo": "FOO", "bar": "BAR"} + + await pc.create_index(name=index_name, dimension=10, spec=spec1, tags=tags) + + desc = await pc.db.index.describe(index_name) + assert desc.tags.to_dict() == tags + await pc.close() + + async def test_create_sparse_index(self, index_name, spec1): + pc = PineconeAsyncio() + + await pc.create_index( + name=index_name, spec=spec1, metric=Metric.DOTPRODUCT, vector_type=VectorType.SPARSE + ) + + desc = await pc.db.index.describe(index_name) + assert desc.vector_type == "sparse" + assert desc.dimension is None + assert desc.vector_type == "sparse" + assert desc.metric == "dotproduct" + await pc.close() + + async def test_create_with_deletion_protection(self, index_name, spec1): + pc = PineconeAsyncio() + + await pc.create_index( + name=index_name, + spec=spec1, + metric=Metric.DOTPRODUCT, + vector_type=VectorType.SPARSE, + deletion_protection=DeletionProtection.ENABLED, + ) + + desc = await pc.db.index.describe(index_name) + assert desc.deletion_protection == "enabled" + assert desc.metric == "dotproduct" + assert desc.vector_type == "sparse" + assert desc.dimension is None + + with pytest.raises(Exception): + await pc.delete_index(index_name) + + await pc.configure_index(index_name, deletion_protection=DeletionProtection.DISABLED) + + desc2 = await pc.db.index.describe(index_name) + assert desc2.deletion_protection == "disabled" + await pc.close() diff --git a/tests/integration/control_asyncio/test_create_index.py b/tests/integration/control_asyncio/test_create_index.py index 334ba86a..683c53a8 100644 --- a/tests/integration/control_asyncio/test_create_index.py +++ b/tests/integration/control_asyncio/test_create_index.py @@ -122,18 +122,16 @@ async def test_create_with_optional_tags(self, index_name, spec1): await pc.close() async def test_create_sparse_index(self, index_name, spec1): - pc = PineconeAsyncio() - - await pc.create_index( - name=index_name, spec=spec1, metric=Metric.DOTPRODUCT, vector_type=VectorType.SPARSE - ) - - desc = await pc.describe_index(index_name) - assert desc.vector_type == "sparse" - assert desc.dimension is None - assert desc.vector_type == "sparse" - assert desc.metric == "dotproduct" - await pc.close() + async with PineconeAsyncio() as pc: + await pc.create_index( + name=index_name, spec=spec1, metric=Metric.DOTPRODUCT, vector_type=VectorType.SPARSE + ) + + desc = await pc.describe_index(index_name) + assert desc.vector_type == "sparse" + assert desc.dimension is None + assert desc.vector_type == "sparse" + assert desc.metric == "dotproduct" async def test_create_with_deletion_protection(self, index_name, spec1): pc = PineconeAsyncio() diff --git a/tests/integration/data/conftest.py b/tests/integration/data/conftest.py index c7498cb8..9fa7b997 100644 --- a/tests/integration/data/conftest.py +++ b/tests/integration/data/conftest.py @@ -1,12 +1,20 @@ import pytest import os import json -from ..helpers import get_environment_var, generate_index_name +import uuid +from ..helpers import get_environment_var, generate_index_name, index_tags as index_tags_helper import logging from pinecone import EmbedModel, CloudProvider, AwsRegion, IndexEmbed logger = logging.getLogger(__name__) +RUN_ID = str(uuid.uuid4()) + + +@pytest.fixture(scope="session") +def index_tags(request): + return index_tags_helper(request, RUN_ID) + def api_key(): return get_environment_var("PINECONE_API_KEY") @@ -90,7 +98,7 @@ def model_idx(client, model_index_name, model_index_host): @pytest.fixture(scope="session") -def model_index_host(model_index_name): +def model_index_host(model_index_name, index_tags): pc = build_client() if model_index_name not in pc.list_indexes().names(): @@ -104,6 +112,7 @@ def model_index_host(model_index_name): field_map={"text": "my_text_field"}, metric="cosine", ), + tags=index_tags, ) else: logger.info(f"Index {model_index_name} already exists") @@ -116,12 +125,12 @@ def model_index_host(model_index_name): @pytest.fixture(scope="session") -def index_host(index_name, metric, spec): +def index_host(index_name, metric, spec, index_tags): pc = build_client() if index_name not in pc.list_indexes().names(): logger.info(f"Creating index {index_name}") - pc.create_index(name=index_name, dimension=2, metric=metric, spec=spec) + pc.create_index(name=index_name, dimension=2, metric=metric, spec=spec, tags=index_tags) else: logger.info(f"Index {index_name} already exists") @@ -133,13 +142,17 @@ def index_host(index_name, metric, spec): @pytest.fixture(scope="session") -def sparse_index_host(sparse_index_name, spec): +def sparse_index_host(sparse_index_name, spec, index_tags): pc = build_client() if sparse_index_name not in pc.list_indexes().names(): logger.info(f"Creating index {sparse_index_name}") pc.create_index( - name=sparse_index_name, metric="dotproduct", spec=spec, vector_type="sparse" + name=sparse_index_name, + metric="dotproduct", + spec=spec, + vector_type="sparse", + tags=index_tags, ) else: logger.info(f"Index {sparse_index_name} already exists") diff --git a/tests/integration/data/seed.py b/tests/integration/data/seed.py index 827aea9a..2019761e 100644 --- a/tests/integration/data/seed.py +++ b/tests/integration/data/seed.py @@ -1,6 +1,5 @@ from ..helpers import poll_fetch_for_ids_in_namespace, embedding_values from pinecone import Vector - import itertools @@ -133,3 +132,6 @@ def setup_weird_ids_data(idx, target_namespace, wait): for i in range(0, len(weird_ids), batch_size): chunk = weird_ids[i : i + batch_size] idx.upsert(vectors=[(x, embedding_values(2)) for x in chunk], namespace=target_namespace) + + if wait: + poll_fetch_for_ids_in_namespace(idx, ids=weird_ids, namespace=target_namespace) diff --git a/tests/integration/data/test_query_namespaces_sparse.py b/tests/integration/data/test_query_namespaces_sparse.py index 607798ea..958368b5 100644 --- a/tests/integration/data/test_query_namespaces_sparse.py +++ b/tests/integration/data/test_query_namespaces_sparse.py @@ -1,6 +1,6 @@ import pytest from ..helpers import random_string, poll_stats_for_namespace -from pinecone.data.query_results_aggregator import QueryResultsAggregatorInvalidTopKError +from pinecone.db_data.query_results_aggregator import QueryResultsAggregatorInvalidTopKError from pinecone import Vector, SparseValues diff --git a/tests/integration/data/test_search_and_upsert_records.py b/tests/integration/data/test_search_and_upsert_records.py index e83a5cd8..0a269a49 100644 --- a/tests/integration/data/test_search_and_upsert_records.py +++ b/tests/integration/data/test_search_and_upsert_records.py @@ -6,7 +6,7 @@ import os from pinecone import RerankModel, PineconeApiException -from pinecone.data import _Index +from pinecone.db_data import _Index logger = logging.getLogger(__name__) diff --git a/tests/integration/data/test_upsert_from_dataframe.py b/tests/integration/data/test_upsert_from_dataframe.py index 49bc9abc..4534bc4f 100644 --- a/tests/integration/data/test_upsert_from_dataframe.py +++ b/tests/integration/data/test_upsert_from_dataframe.py @@ -1,5 +1,5 @@ import pandas as pd -from pinecone.data import _Index +from pinecone.db_data import _Index from ..helpers import embedding_values, random_string diff --git a/tests/integration/data_asyncio/conftest.py b/tests/integration/data_asyncio/conftest.py index 6401e073..9769a5e9 100644 --- a/tests/integration/data_asyncio/conftest.py +++ b/tests/integration/data_asyncio/conftest.py @@ -2,7 +2,7 @@ import json import asyncio from ..helpers import get_environment_var, generate_index_name -from pinecone.data import _IndexAsyncio +from pinecone.db_data import _IndexAsyncio import logging from typing import Callable, Optional, Awaitable, Union diff --git a/tests/integration/helpers/__init__.py b/tests/integration/helpers/__init__.py index f233d089..3b680b3d 100644 --- a/tests/integration/helpers/__init__.py +++ b/tests/integration/helpers/__init__.py @@ -8,4 +8,5 @@ poll_fetch_for_ids_in_namespace, embedding_values, jsonprint, + index_tags, ) diff --git a/tests/integration/helpers/helpers.py b/tests/integration/helpers/helpers.py index 480585e5..d9990df4 100644 --- a/tests/integration/helpers/helpers.py +++ b/tests/integration/helpers/helpers.py @@ -7,7 +7,7 @@ from typing import Any from datetime import datetime import json -from pinecone.data import _Index +from pinecone.db_data import _Index from typing import List logger = logging.getLogger(__name__) @@ -123,3 +123,22 @@ def fake_api_key(): def jsonprint(obj): print(json.dumps(obj.to_dict(), indent=2)) + + +def index_tags(request, run_id): + test_name = request.node.name + if test_name is None: + test_name = "" + else: + test_name = test_name.replace(":", "_").replace("[", "_").replace("]", "_") + + tags = { + "test-suite": "pinecone-python-client", + "test-run": run_id, + "test": test_name, + "created-at": datetime.now().strftime("%Y-%m-%d"), + } + + if os.getenv("USER"): + tags["user"] = os.getenv("USER") + return tags diff --git a/tests/perf/test_query_results_aggregator.py b/tests/perf/test_query_results_aggregator.py index 29ac4c35..9f33c149 100644 --- a/tests/perf/test_query_results_aggregator.py +++ b/tests/perf/test_query_results_aggregator.py @@ -1,5 +1,5 @@ import random -from pinecone.data.query_results_aggregator import QueryResultsAggregator +from pinecone.db_data.query_results_aggregator import QueryResultsAggregator def fake_results(i): diff --git a/tests/unit/data/test_bulk_import.py b/tests/unit/data/test_bulk_import.py index b1bcd4cc..c7ad5a14 100644 --- a/tests/unit/data/test_bulk_import.py +++ b/tests/unit/data/test_bulk_import.py @@ -6,7 +6,7 @@ ImportErrorMode as ImportErrorModeGeneratedClass, ) -from pinecone.data.features.bulk_import import ImportFeatureMixin, ImportErrorMode +from pinecone.db_data.features.bulk_import import ImportFeatureMixin, ImportErrorMode def build_client_w_faked_response(mocker, body: str, status: int = 200): diff --git a/tests/unit/data/test_request_factory.py b/tests/unit/data/test_request_factory.py index 087436c9..ea04acdf 100644 --- a/tests/unit/data/test_request_factory.py +++ b/tests/unit/data/test_request_factory.py @@ -1,5 +1,5 @@ import pytest -from pinecone.data.request_factory import ( +from pinecone.db_data.request_factory import ( IndexRequestFactory, SearchQuery, SearchQueryVector, diff --git a/tests/unit/data/test_vector_factory.py b/tests/unit/data/test_vector_factory.py index 52fd1eac..adeeaf9c 100644 --- a/tests/unit/data/test_vector_factory.py +++ b/tests/unit/data/test_vector_factory.py @@ -2,7 +2,7 @@ import pandas as pd import pytest -from pinecone.data.vector_factory import VectorFactory +from pinecone.db_data.vector_factory import VectorFactory from pinecone import Vector, SparseValues, ListConversionException from pinecone.core.openapi.db_data.models import ( Vector as OpenApiVector, diff --git a/tests/unit/models/test_index_model.py b/tests/unit/models/test_index_model.py index 7320ce8d..7aeb88d1 100644 --- a/tests/unit/models/test_index_model.py +++ b/tests/unit/models/test_index_model.py @@ -5,7 +5,7 @@ ServerlessSpec, DeletionProtection, ) -from pinecone.models import IndexModel +from pinecone.db_control.models import IndexModel from pinecone import CloudProvider, AwsRegion diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index f33519b6..90ce4c1f 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,7 +1,7 @@ from pinecone import Pinecone from pinecone.exceptions.exceptions import PineconeConfigurationError from pinecone.config import PineconeConfig -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration import pytest import os @@ -103,7 +103,11 @@ def test_config_pool_threads(self): pc = Pinecone( api_key="test-api-key", host="test-controller-host.pinecone.io", pool_threads=10 ) - assert pc.index_api.api_client.pool_threads == 10 + # DBControl object is created lazily, so we need to access this property + # to trigger the setup so we can inspect the config + assert pc.db is not None + + assert pc.db._index_api.api_client.pool_threads == 10 idx = pc.Index(host="my-index-host.pinecone.io", name="my-index-name") assert idx._vector_api.api_client.pool_threads == 10 @@ -111,8 +115,8 @@ def test_ssl_config_passed_to_index_client(self): proxy_headers = make_headers(proxy_basic_auth="asdf") pc = Pinecone(api_key="key", ssl_ca_certs="path/to/cert", proxy_headers=proxy_headers) - assert pc.openapi_config.ssl_ca_cert == "path/to/cert" - assert pc.openapi_config.proxy_headers == proxy_headers + assert pc._openapi_config.ssl_ca_cert == "path/to/cert" + assert pc._openapi_config.proxy_headers == proxy_headers idx = pc.Index(host="host.pinecone.io") assert idx._vector_api.api_client.configuration.ssl_ca_cert == "path/to/cert" @@ -122,16 +126,16 @@ def test_host_config_not_clobbered_by_index(self): proxy_headers = make_headers(proxy_basic_auth="asdf") pc = Pinecone(api_key="key", ssl_ca_certs="path/to/cert", proxy_headers=proxy_headers) - assert pc.openapi_config.ssl_ca_cert == "path/to/cert" - assert pc.openapi_config.proxy_headers == proxy_headers - assert pc.openapi_config.host == "https://api.pinecone.io" + assert pc._openapi_config.ssl_ca_cert == "path/to/cert" + assert pc._openapi_config.proxy_headers == proxy_headers + assert pc._openapi_config.host == "https://api.pinecone.io" idx = pc.Index(host="host.pinecone.io") assert idx._vector_api.api_client.configuration.ssl_ca_cert == "path/to/cert" assert idx._vector_api.api_client.configuration.proxy_headers == proxy_headers assert idx._vector_api.api_client.configuration.host == "https://host.pinecone.io" - assert pc.openapi_config.host == "https://api.pinecone.io" + assert pc._openapi_config.host == "https://api.pinecone.io" def test_proxy_config(self): pc = Pinecone( @@ -140,11 +144,15 @@ def test_proxy_config(self): ssl_ca_certs="path/to/cert-bundle.pem", ) - assert pc.config.proxy_url == "http://localhost:8080" - assert pc.config.ssl_ca_certs == "path/to/cert-bundle.pem" + assert pc._config.proxy_url == "http://localhost:8080" + assert pc._config.ssl_ca_certs == "path/to/cert-bundle.pem" + + assert pc._openapi_config.proxy == "http://localhost:8080" + assert pc._openapi_config.ssl_ca_cert == "path/to/cert-bundle.pem" - assert pc.openapi_config.proxy == "http://localhost:8080" - assert pc.openapi_config.ssl_ca_cert == "path/to/cert-bundle.pem" + # DBControl object is created lazily, so we need to access this property + # to trigger the setup so we can inspect the config + assert pc.db is not None - assert pc.index_api.api_client.configuration.proxy == "http://localhost:8080" - assert pc.index_api.api_client.configuration.ssl_ca_cert == "path/to/cert-bundle.pem" + assert pc.db._index_api.api_client.configuration.proxy == "http://localhost:8080" + assert pc.db._index_api.api_client.configuration.ssl_ca_cert == "path/to/cert-bundle.pem" diff --git a/tests/unit/test_config_builder.py b/tests/unit/test_config_builder.py index 3122c080..7307f153 100644 --- a/tests/unit/test_config_builder.py +++ b/tests/unit/test_config_builder.py @@ -1,6 +1,6 @@ import pytest -from pinecone.openapi_support.configuration import Configuration as OpenApiConfiguration +from pinecone.config.openapi_configuration import Configuration as OpenApiConfiguration from pinecone.config import ConfigBuilder from pinecone import PineconeConfigurationError diff --git a/tests/unit/test_control.py b/tests/unit/test_control.py index c0b909dd..6cce0f92 100644 --- a/tests/unit/test_control.py +++ b/tests/unit/test_control.py @@ -77,44 +77,48 @@ def index_list_response(): class TestControl: - def test_plugins_are_installed(self): + def test_plugins_are_lazily_loaded(self): with patch.object(PluginAware, "load_plugins") as mock_install_plugins: - Pinecone(api_key="asdf") + pc = Pinecone(api_key="asdf") + mock_install_plugins.assert_not_called() + with pytest.raises(AttributeError): + pc.foo() # Accessing a non-existent attribute should raise an AttributeError after PluginAware installs any applicable plugins mock_install_plugins.assert_called_once() def test_default_host(self): p = Pinecone(api_key="123-456-789") - assert p.index_api.api_client.configuration.host == "https://api.pinecone.io" + assert p.db._index_api.api_client.configuration.host == "https://api.pinecone.io" def test_passing_host(self): p = Pinecone(api_key="123-456-789", host="my-host.pinecone.io") - assert p.index_api.api_client.configuration.host == "https://my-host.pinecone.io" + assert p.db._index_api.api_client.configuration.host == "https://my-host.pinecone.io" def test_passing_additional_headers(self): extras = {"header1": "my-value", "header2": "my-value2"} p = Pinecone(api_key="123-456-789", additional_headers=extras) for key, value in extras.items(): - assert p.index_api.api_client.default_headers[key] == value - assert "User-Agent" in p.index_api.api_client.default_headers - assert "X-Pinecone-API-Version" in p.index_api.api_client.default_headers - assert "header1" in p.index_api.api_client.default_headers - assert "header2" in p.index_api.api_client.default_headers - assert len(p.index_api.api_client.default_headers) == 4 + assert p.db._index_api.api_client.default_headers[key] == value + assert "User-Agent" in p.db._index_api.api_client.default_headers + assert "X-Pinecone-API-Version" in p.db._index_api.api_client.default_headers + assert "header1" in p.db._index_api.api_client.default_headers + assert "header2" in p.db._index_api.api_client.default_headers + assert len(p.db._index_api.api_client.default_headers) == 4 def test_overwrite_useragent(self): # This doesn't seem like a common use case, but we may want to allow this # when embedding the client in other pinecone tools such as canopy. extras = {"User-Agent": "test-user-agent"} p = Pinecone(api_key="123-456-789", additional_headers=extras) - assert "X-Pinecone-API-Version" in p.index_api.api_client.default_headers - assert p.index_api.api_client.default_headers["User-Agent"] == "test-user-agent" - assert len(p.index_api.api_client.default_headers) == 2 + assert "X-Pinecone-API-Version" in p.db._index_api.api_client.default_headers + assert p.db._index_api.api_client.default_headers["User-Agent"] == "test-user-agent" + assert len(p.db._index_api.api_client.default_headers) == 2 def test_set_source_tag_in_useragent(self): p = Pinecone(api_key="123-456-789", source_tag="test_source_tag") assert ( - re.search(r"source_tag=test_source_tag", p.index_api.api_client.user_agent) is not None + re.search(r"source_tag=test_source_tag", p.db._index_api.api_client.user_agent) + is not None ) @pytest.mark.parametrize( @@ -146,8 +150,8 @@ def test_create_index_with_timeout( expected_sleep_calls, ): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_responses) - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db._index_api, "describe_index", side_effect=describe_index_responses) + mocker.patch.object(p.db._index_api, "create_index") mocker.patch("time.sleep") p.create_index( @@ -157,8 +161,8 @@ def test_create_index_with_timeout( timeout=timeout_value, ) - assert p.index_api.create_index.call_count == 1 - assert p.index_api.describe_index.call_count == expected_describe_index_calls + assert p.db._index_api.create_index.call_count == 1 + assert p.db._index_api.describe_index.call_count == expected_describe_index_calls assert time.sleep.call_count == expected_sleep_calls @pytest.mark.parametrize( @@ -207,7 +211,7 @@ def test_create_index_with_spec_dictionary(self, mocker, index_spec): p = Pinecone(api_key="123-456-789") mock_api = MagicMock() - mocker.patch.object(p, "index_api", mock_api) + mocker.patch.object(p.db, "_index_api", mock_api) p.create_index(name="my-index", dimension=10, spec=index_spec) @@ -242,8 +246,8 @@ def test_create_index_from_source_collection( expected_sleep_calls, ): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_responses) - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db._index_api, "describe_index", side_effect=describe_index_responses) + mocker.patch.object(p.db._index_api, "create_index") mocker.patch("time.sleep") p.create_index( @@ -253,17 +257,19 @@ def test_create_index_from_source_collection( timeout=timeout_value, ) - assert p.index_api.create_index.call_count == 1 - assert p.index_api.describe_index.call_count == expected_describe_index_calls + assert p.db._index_api.create_index.call_count == 1 + assert p.db._index_api.describe_index.call_count == expected_describe_index_calls assert time.sleep.call_count == expected_sleep_calls def test_create_index_when_timeout_exceeded(self, mocker): with pytest.raises(TimeoutError): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "create_index") + mocker.patch.object(p.db._index_api, "create_index") describe_index_response = [description_with_status(False)] * 5 - mocker.patch.object(p.index_api, "describe_index", side_effect=describe_index_response) + mocker.patch.object( + p.db._index_api, "describe_index", side_effect=describe_index_response + ) mocker.patch("time.sleep") p.create_index( @@ -273,7 +279,7 @@ def test_create_index_when_timeout_exceeded(self, mocker): def test_list_indexes_returns_iterable(self, mocker, index_list_response): p = Pinecone(api_key="123-456-789") - mocker.patch.object(p.index_api, "list_indexes", side_effect=[index_list_response]) + mocker.patch.object(p.db._index_api, "list_indexes", side_effect=[index_list_response]) response = p.list_indexes() assert [i.name for i in response] == ["index1", "index2", "index3"] diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 72ed7422..6e880016 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,7 +1,7 @@ import pandas as pd import pytest -from pinecone.data import _Index +from pinecone.db_data import _Index import pinecone.core.openapi.db_data.models as oai from pinecone import QueryResponse, UpsertResponse, Vector diff --git a/tests/unit/test_index_initialization.py b/tests/unit/test_index_initialization.py index 3d10d636..e20e3f78 100644 --- a/tests/unit/test_index_initialization.py +++ b/tests/unit/test_index_initialization.py @@ -51,5 +51,6 @@ def test_overwrite_useragent(self): def test_set_source_tag(self): pc = Pinecone(api_key="123-456-789", source_tag="test_source_tag") assert ( - re.search(r"source_tag=test_source_tag", pc.index_api.api_client.user_agent) is not None + re.search(r"source_tag=test_source_tag", pc.db._index_api.api_client.user_agent) + is not None ) diff --git a/tests/unit/test_plugin_aware.py b/tests/unit/test_plugin_aware.py new file mode 100644 index 00000000..a2912bfa --- /dev/null +++ b/tests/unit/test_plugin_aware.py @@ -0,0 +1,48 @@ +import pytest +from pinecone.utils.plugin_aware import PluginAware +from pinecone.config import Config, OpenApiConfiguration + + +class TestPluginAware: + def test_errors_when_required_attributes_are_missing(self): + class Foo(PluginAware): + def __init__(self): + # does not set config, openapi_config, or pool_threads + super().__init__() + + with pytest.raises(AttributeError) as e: + Foo() + + assert "_config" in str(e.value) + assert "_openapi_config" in str(e.value) + assert "_pool_threads" in str(e.value) + + def test_correctly_raise_attribute_errors(self): + class Foo(PluginAware): + def __init__(self): + self.config = Config() + self._openapi_config = OpenApiConfiguration() + self._pool_threads = 1 + + super().__init__() + + foo = Foo() + + with pytest.raises(AttributeError) as e: + foo.bar() + + assert "bar" in str(e.value) + + def test_plugins_are_lazily_loaded(self): + class Pinecone(PluginAware): + def __init__(self): + self.config = Config() + self._openapi_config = OpenApiConfiguration() + self._pool_threads = 10 + + super().__init__() + + pc = Pinecone() + assert "assistant" not in dir(pc) + + assert pc.assistant is not None diff --git a/tests/unit/test_query_results_aggregator.py b/tests/unit/test_query_results_aggregator.py index b40a11d2..d3c97f87 100644 --- a/tests/unit/test_query_results_aggregator.py +++ b/tests/unit/test_query_results_aggregator.py @@ -1,4 +1,4 @@ -from pinecone.data.query_results_aggregator import ( +from pinecone.db_data.query_results_aggregator import ( QueryResultsAggregator, QueryResultsAggregatorInvalidTopKError, ) diff --git a/tests/unit/utils/test_docs_links.py b/tests/unit/utils/test_docs_links.py index 478ba3b2..c1d01b21 100644 --- a/tests/unit/utils/test_docs_links.py +++ b/tests/unit/utils/test_docs_links.py @@ -1,11 +1,17 @@ import pytest import requests from pinecone.utils import docslinks +from pinecone import __version__ urls = list(docslinks.values()) @pytest.mark.parametrize("url", urls) def test_valid_links(url): - response = requests.get(url) - assert response.status_code == 200, f"Docs link is invalid: {url}" + if isinstance(url, str): + response = requests.get(url) + assert response.status_code == 200, f"Docs link is invalid: {url}" + else: + versioned_url = url(__version__) + response = requests.get(versioned_url) + assert response.status_code == 200, f"Docs link is invalid: {versioned_url}" diff --git a/tests/unit_grpc/test_grpc_index_initialization.py b/tests/unit_grpc/test_grpc_index_initialization.py index 710c3f26..b0b64250 100644 --- a/tests/unit_grpc/test_grpc_index_initialization.py +++ b/tests/unit_grpc/test_grpc_index_initialization.py @@ -43,7 +43,7 @@ def test_config_passed_when_target_by_name(self): # Set this state in the host store to skip network call # to find host for name - pc.index_host_store.set_host(pc.config, "my-index", "myhost") + pc.db.index._index_host_store.set_host(pc._config, "my-index", "myhost") config = GRPCClientConfig(timeout=10, secure=False) index = pc.Index(name="my-index", grpc_config=config) @@ -88,5 +88,6 @@ def test_config_passed_when_target_by_host_and_port(self): def test_config_passes_source_tag_when_set(self): pc = PineconeGRPC(api_key="YOUR_API_KEY", source_tag="my_source_tag") assert ( - re.search(r"source_tag=my_source_tag", pc.index_api.api_client.user_agent) is not None + re.search(r"source_tag=my_source_tag", pc.db._index_api.api_client.user_agent) + is not None ) diff --git a/tests/upgrade/test_all.py b/tests/upgrade/test_all.py new file mode 100644 index 00000000..acabf620 --- /dev/null +++ b/tests/upgrade/test_all.py @@ -0,0 +1,28 @@ +class TestAll: + def test_all_is_complete(self): + """Test that __all__ is complete and accurate.""" + # Import the module + import pinecone + + # Get all public names (those that don't start with _) + public_names = {name for name in dir(pinecone) if not name.startswith("_")} + + # Get __all__ if it exists, otherwise empty set + all_names = set(getattr(pinecone, "__all__", [])) + + # Check that __all__ exists + assert hasattr(pinecone, "__all__"), "Module should have __all__ defined" + + # Check that all names in __all__ are actually importable + for name in all_names: + assert getattr(pinecone, name) is not None, f"Name {name} in __all__ is not importable" + + # Check that all public names are in __all__ + missing_from_all = public_names - all_names + for name in missing_from_all: + print(f"Public name {name} is not in __all__") + assert not missing_from_all, f"Public names not in __all__: {missing_from_all}" + + # Check that __all__ doesn't contain any private names + private_in_all = {name for name in all_names if name.startswith("_")} + assert not private_in_all, f"Private names in __all__: {private_in_all}" diff --git a/tests/upgrade/test_reorganization.py b/tests/upgrade/test_reorganization.py new file mode 100644 index 00000000..331681b7 --- /dev/null +++ b/tests/upgrade/test_reorganization.py @@ -0,0 +1,19 @@ +import pytest + + +class TestReorganization: + def test_data(self): + with pytest.warns(DeprecationWarning) as warning_info: + from pinecone.data import Index + + assert Index is not None + assert len(warning_info) > 0 + assert "has moved to" in str(warning_info[0].message) + + def test_config(self): + with pytest.warns(DeprecationWarning) as warning_info: + from pinecone.config import PineconeConfig + + assert PineconeConfig is not None + assert len(warning_info) > 0 + assert "has moved to" in str(warning_info[0].message) diff --git a/tests/upgrade/test_v6_upgrade.py b/tests/upgrade/test_v6_upgrade.py new file mode 100644 index 00000000..6532f65f --- /dev/null +++ b/tests/upgrade/test_v6_upgrade.py @@ -0,0 +1,263 @@ +import pinecone +import logging + +logger = logging.getLogger(__name__) + + +class TestExpectedImports_UpgradeFromV6: + def test_mapped_data_imports(self): + data_imports = [ + "Vector", + "QueryRequest", + "FetchResponse", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "RpcStatus", + "ScoredVector", + "ServiceException", + "SingleQueryResults", + "QueryResponse", + "RerankModel", + "SearchQuery", + "SearchQueryVector", + "SearchRerank", + "UpsertResponse", + "UpdateRequest", + ] + + control_imports = [ + "CollectionDescription", + "CollectionList", + "ServerlessSpec", + "ServerlessSpecDefinition", + "PodSpec", + "PodSpecDefinition", + # 'ForbiddenException', + # 'ImportErrorMode', + # 'Index', + "IndexList", + "IndexModel", + # 'ListConversionException', + # 'MetadataDictionaryExpectedError', + # 'NotFoundException', + ] + + config_imports = [ + "Config", + "ConfigBuilder", + "PineconeConfig", + "PineconeConfigurationError", + "PineconeException", + "PineconeProtocolError", + "PineconeApiAttributeError", + "PineconeApiException", + ] + + exception_imports = [ + "PineconeConfigurationError", + "PineconeProtocolError", + "PineconeException", + "PineconeApiAttributeError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeApiKeyError", + "PineconeApiException", + "NotFoundException", + "UnauthorizedException", + "ForbiddenException", + "ServiceException", + "ListConversionException", + ] + mapped_imports = data_imports + control_imports + config_imports + exception_imports + + for import_name in mapped_imports: + assert hasattr(pinecone, import_name), f"Import {import_name} not found in pinecone" + + def test_v6_upgrade_root_imports(self): + v6_dir_items = [ + "CollectionDescription", + "CollectionList", + "Config", + "ConfigBuilder", + "DeleteRequest", + "DescribeIndexStatsRequest", + "DescribeIndexStatsResponse", + "FetchResponse", + "ForbiddenException", + "ImportErrorMode", + "Index", + "IndexList", + "IndexModel", + "ListConversionException", + "MetadataDictionaryExpectedError", + "NotFoundException", + "Pinecone", + "PineconeApiAttributeError", + "PineconeApiException", + "PineconeApiKeyError", + "PineconeApiTypeError", + "PineconeApiValueError", + "PineconeConfig", + "PineconeConfigurationError", + "PineconeException", + "PineconeProtocolError", + "PodSpec", + "PodSpecDefinition", + "QueryRequest", + "QueryResponse", + "RpcStatus", + "ScoredVector", + "ServerlessSpec", + "ServerlessSpecDefinition", + "ServiceException", + "SingleQueryResults", + "SparseValues", + "SparseValuesDictionaryExpectedError", + "SparseValuesMissingKeysError", + "SparseValuesTypeError", + "TqdmExperimentalWarning", + "UnauthorizedException", + "UpdateRequest", + "UpsertRequest", + "UpsertResponse", + "Vector", + "VectorDictionaryExcessKeysError", + "VectorDictionaryMissingKeysError", + "VectorTupleLengthError", + "__builtins__", + "__cached__", + "__doc__", + "__file__", + "__loader__", + "__name__", + "__package__", + "__path__", + "__spec__", + "__version__", + "config", + "configure_index", + "control", + "core", + "core_ea", + "create_collection", + "create_index", + "data", + "delete_collection", + "delete_index", + "deprecation_warnings", + "describe_collection", + "describe_index", + "errors", + "exceptions", + "features", + "index", + "index_host_store", + "init", + "install_repr_overrides", + "langchain_import_warnings", + "list_collections", + "list_indexes", + "logging", + "models", + "openapi", + "os", + "pinecone", + "pinecone_config", + "repr_overrides", + "scale_index", + "sparse_vector_factory", + "utils", + "vector_factory", + "warnings", + ] + + intentionally_removed_items = ["os"] + + expected_items = [item for item in v6_dir_items if item not in intentionally_removed_items] + + missing_items = [] + for item in expected_items: + if not hasattr(pinecone, item): + missing_items.append(item) + logger.debug(f"Exported: ❌ {item}") + else: + logger.debug(f"Exported: ✅ {item}") + + extra_items = [] + for item in intentionally_removed_items: + if hasattr(pinecone, item): + extra_items.append(item) + logger.debug(f"Removed: ❌ {item}") + else: + logger.debug(f"Removed: ✅ {item}") + + assert len(missing_items) == 0, f"Missing items: {missing_items}" + assert len(extra_items) == 0, f"Extra items: {extra_items}" + + # def test_v6_upgrade_data_imports(self): + # v6_data_dir_items = [ + # "DescribeIndexStatsResponse", + # "EmbedModel", + # "FetchResponse", + # "ImportErrorMode", + # "Index", + # "IndexClientInstantiationError", + # "Inference", + # "InferenceInstantiationError", + # "MetadataDictionaryExpectedError", + # "QueryResponse", + # "RerankModel", + # "SearchQuery", + # "SearchQueryVector", + # "SearchRerank", + # "SparseValues", + # "SparseValuesDictionaryExpectedError", + # "SparseValuesMissingKeysError", + # "SparseValuesTypeError", + # "UpsertResponse", + # "Vector", + # "VectorDictionaryExcessKeysError", + # "VectorDictionaryMissingKeysError", + # "VectorTupleLengthError", + # "_AsyncioInference", + # "_Index", + # "_IndexAsyncio", + # "_Inference", + # "__builtins__", + # "__cached__", + # "__doc__", + # "__file__", + # "__loader__", + # "__name__", + # "__package__", + # "__path__", + # "__spec__", + # "dataclasses", + # "errors", + # "features", + # "fetch_response", + # "import_error", + # "index", + # "index_asyncio", + # "index_asyncio_interface", + # "interfaces", + # "query_results_aggregator", + # "request_factory", + # "search_query", + # "search_query_vector", + # "search_rerank", + # "sparse_values", + # "sparse_values_factory", + # "types", + # "utils", + # "vector", + # "vector_factory", + # ] + + # missing_items = [] + # for item in v6_data_dir_items: + # if item not in dir(pinecone.db_data): + # missing_items.append(item) + + # assert len(missing_items) == 0, f"Missing items: {missing_items}"