From 2cf6a10e64877391242af233fae614b25d041450 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Fri, 19 Jul 2024 12:10:27 +0100 Subject: [PATCH 1/4] Updated milvus client to include backoff strategy on ingestion. Added extra configurations to ease remote benchmark run. --- engine/clients/milvus/__init__.py | 6 --- engine/clients/milvus/config.py | 27 +++++++++++-- engine/clients/milvus/configure.py | 18 ++++----- engine/clients/milvus/search.py | 18 +++------ engine/clients/milvus/upload.py | 37 ++++++++---------- .../configurations/milvus-single-node.json | 39 ++++++++++++------- 6 files changed, 80 insertions(+), 65 deletions(-) diff --git a/engine/clients/milvus/__init__.py b/engine/clients/milvus/__init__.py index 31abe17b..ca400c86 100644 --- a/engine/clients/milvus/__init__.py +++ b/engine/clients/milvus/__init__.py @@ -1,9 +1,3 @@ from engine.clients.milvus.configure import MilvusConfigurator from engine.clients.milvus.search import MilvusSearcher from engine.clients.milvus.upload import MilvusUploader - -__all__ = [ - "MilvusConfigurator", - "MilvusSearcher", - "MilvusUploader", -] diff --git a/engine/clients/milvus/config.py b/engine/clients/milvus/config.py index 48d26ed3..df5acd3d 100644 --- a/engine/clients/milvus/config.py +++ b/engine/clients/milvus/config.py @@ -1,10 +1,13 @@ -from pymilvus import DataType - +from pymilvus import DataType, connections +import os from engine.base_client.distances import Distance MILVUS_COLLECTION_NAME = "Benchmark" MILVUS_DEFAULT_ALIAS = "bench" MILVUS_DEFAULT_PORT = "19530" +MILVUS_PASS = os.getenv("MILVUS_PASS", "") +MILVUS_USER = os.getenv("MILVUS_USER", "") +MILVUS_PORT = os.getenv("MILVUS_PORT", MILVUS_DEFAULT_PORT) DISTANCE_MAPPING = { Distance.L2: "L2", @@ -23,5 +26,23 @@ DataType.INT64: 0, DataType.VARCHAR: "---MILVUS DOES NOT ACCEPT EMPTY STRINGS---", DataType.FLOAT: 0.0, - DataType.DOUBLE: 0.0, } + + +def get_milvus_client(connection_params: dict, host: str, alias: str): + h = "" + uri = "" + if host.startswith("http"): + uri = host + else: + h = host + client = connections.connect( + alias=alias, + host=h, + uri=uri, + port=MILVUS_PORT, + user=MILVUS_USER, + password=MILVUS_PASS, + **connection_params + ) + return client diff --git a/engine/clients/milvus/configure.py b/engine/clients/milvus/configure.py index 85f2c774..b8fea571 100644 --- a/engine/clients/milvus/configure.py +++ b/engine/clients/milvus/configure.py @@ -17,7 +17,7 @@ DTYPE_EXTRAS, MILVUS_COLLECTION_NAME, MILVUS_DEFAULT_ALIAS, - MILVUS_DEFAULT_PORT, + get_milvus_client, ) @@ -32,20 +32,18 @@ class MilvusConfigurator(BaseConfigurator): def __init__(self, host, collection_params: dict, connection_params: dict): super().__init__(host, collection_params, connection_params) - self.client = connections.connect( - alias=MILVUS_DEFAULT_ALIAS, - host=host, - port=str(connection_params.get("port", MILVUS_DEFAULT_PORT)), - **connection_params, - ) + self.client = get_milvus_client(connection_params, host, MILVUS_DEFAULT_ALIAS) print("established connection") def clean(self): - try: + if utility.has_collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS): + print("dropping collection named {MILVUS_COLLECTION_NAME}...") utility.drop_collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS) + print("dropped collection named {MILVUS_COLLECTION_NAME}...") + assert ( utility.has_collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS) - except MilvusException: - pass + is False + ) def recreate(self, dataset: Dataset, collection_params): idx = FieldSchema( diff --git a/engine/clients/milvus/search.py b/engine/clients/milvus/search.py index 1694fc37..f93a8fea 100644 --- a/engine/clients/milvus/search.py +++ b/engine/clients/milvus/search.py @@ -3,13 +3,12 @@ from pymilvus import Collection, connections -from dataset_reader.base_reader import Query from engine.base_client.search import BaseSearcher from engine.clients.milvus.config import ( DISTANCE_MAPPING, MILVUS_COLLECTION_NAME, MILVUS_DEFAULT_ALIAS, - MILVUS_DEFAULT_PORT, + get_milvus_client, ) from engine.clients.milvus.parser import MilvusConditionParser @@ -23,12 +22,7 @@ class MilvusSearcher(BaseSearcher): @classmethod def init_client(cls, host, distance, connection_params: dict, search_params: dict): - cls.client = connections.connect( - alias=MILVUS_DEFAULT_ALIAS, - host=host, - port=str(connection_params.get("port", MILVUS_DEFAULT_PORT)), - **connection_params - ) + cls.client = get_milvus_client(connection_params, host, MILVUS_DEFAULT_ALIAS) cls.collection = Collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS) cls.search_params = search_params cls.distance = DISTANCE_MAPPING[distance] @@ -38,15 +32,15 @@ def get_mp_start_method(cls): return "forkserver" if "forkserver" in mp.get_all_start_methods() else "spawn" @classmethod - def search_one(cls, query: Query, top: int) -> List[Tuple[int, float]]: - param = {"metric_type": cls.distance, "params": cls.search_params["config"]} + def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]: + param = {"metric_type": cls.distance, "params": cls.search_params["params"]} try: res = cls.collection.search( - data=[query.vector], + data=[vector], anns_field="vector", param=param, limit=top, - expr=cls.parser.parse(query.meta_conditions), + expr=cls.parser.parse(meta_conditions), ) except Exception as e: import ipdb diff --git a/engine/clients/milvus/upload.py b/engine/clients/milvus/upload.py index 8c3768e1..6d0749ca 100644 --- a/engine/clients/milvus/upload.py +++ b/engine/clients/milvus/upload.py @@ -1,5 +1,7 @@ +import logging import multiprocessing as mp -from typing import List +from typing import List, Optional +import backoff from pymilvus import ( Collection, @@ -8,14 +10,13 @@ wait_for_index_building_complete, ) -from dataset_reader.base_reader import Record from engine.base_client.upload import BaseUploader from engine.clients.milvus.config import ( DISTANCE_MAPPING, DTYPE_DEFAULT, MILVUS_COLLECTION_NAME, MILVUS_DEFAULT_ALIAS, - MILVUS_DEFAULT_PORT, + get_milvus_client, ) @@ -31,37 +32,33 @@ def get_mp_start_method(cls): @classmethod def init_client(cls, host, distance, connection_params, upload_params): - cls.client = connections.connect( - alias=MILVUS_DEFAULT_ALIAS, - host=host, - port=str(connection_params.get("port", MILVUS_DEFAULT_PORT)), - **connection_params - ) + cls.client = get_milvus_client(connection_params, host, MILVUS_DEFAULT_ALIAS) cls.collection = Collection(MILVUS_COLLECTION_NAME, using=MILVUS_DEFAULT_ALIAS) cls.upload_params = upload_params cls.distance = DISTANCE_MAPPING[distance] @classmethod - def upload_batch(cls, batch: List[Record]): - has_metadata = any(record.metadata for record in batch) - if has_metadata: + def upload_batch( + cls, ids: List[int], vectors: List[list], metadata: Optional[List[dict]] + ): + if metadata is not None: field_values = [ [ - record.metadata.get(field_schema.name) - or DTYPE_DEFAULT[field_schema.dtype] - for record in batch + payload.get(field_schema.name) or DTYPE_DEFAULT[field_schema.dtype] + for payload in metadata ] for field_schema in cls.collection.schema.fields if field_schema.name not in ["id", "vector"] ] else: field_values = [] + cls.upload_with_backoff(field_values, ids, vectors) - ids, vectors = [], [] - for record in batch: - ids.append(record.id) - vectors.append(record.vector) - + @classmethod + @backoff.on_exception( + backoff.expo, MilvusException, max_time=600, backoff_log_level=logging.WARN + ) + def upload_with_backoff(cls, field_values, ids, vectors): cls.collection.insert([ids, vectors] + field_values) @classmethod diff --git a/experiments/configurations/milvus-single-node.json b/experiments/configurations/milvus-single-node.json index bffc0a1c..175965ef 100644 --- a/experiments/configurations/milvus-single-node.json +++ b/experiments/configurations/milvus-single-node.json @@ -5,19 +5,30 @@ "connection_params": {}, "collection_params": {}, "search_params": [ - { "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } }, - { "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } } + { "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } }, + { "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } } ], "upload_params": { "parallel": 16, "index_params": { "efConstruction": 100, "M": 16 } } }, + { + "name": "milvus-m-16-ef-64", + "engine": "milvus", + "connection_params": {}, + "collection_params": {}, + "search_params": [ + { "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } }, + { "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } } + ], + "upload_params": { "parallel": 16, "index_params": { "efConstruction": 64, "M": 16 } } + }, { "name": "milvus-m-16-ef-128", "engine": "milvus", "connection_params": {}, "collection_params": {}, "search_params": [ - { "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } }, - { "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } } + { "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } }, + { "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } } ], "upload_params": { "parallel": 16, "index_params": { "efConstruction": 128, "M": 16 } } }, @@ -27,8 +38,8 @@ "connection_params": {}, "collection_params": {}, "search_params": [ - { "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } }, - { "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } } + { "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } }, + { "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } } ], "upload_params": { "parallel": 16, "index_params": { "efConstruction": 128, "M": 32 } } }, @@ -38,8 +49,8 @@ "connection_params": {}, "collection_params": {}, "search_params": [ - { "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } }, - { "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } } + { "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } }, + { "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } } ], "upload_params": { "parallel": 16, "index_params": { "efConstruction": 256, "M": 32 } } }, @@ -49,8 +60,8 @@ "connection_params": {}, "collection_params": {}, "search_params": [ - { "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } }, - { "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } } + { "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } }, + { "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } } ], "upload_params": { "parallel": 16, "index_params": { "efConstruction": 512, "M": 32 } } }, @@ -60,8 +71,8 @@ "connection_params": {}, "collection_params": {}, "search_params": [ - { "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } }, - { "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } } + { "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } }, + { "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } } ], "upload_params": { "parallel": 16, "index_params": { "efConstruction": 256, "M": 64 } } }, @@ -71,8 +82,8 @@ "connection_params": {}, "collection_params": {}, "search_params": [ - { "parallel": 1, "config": { "ef": 128 } }, { "parallel": 1, "config": { "ef": 256 } }, { "parallel": 1, "config": { "ef": 512 } }, - { "parallel": 100, "config": { "ef": 128 } }, { "parallel": 100, "config": { "ef": 256 } }, { "parallel": 100, "config": { "ef": 512 } } + { "parallel": 1, "params": { "ef": 64 } }, { "parallel": 1, "params": { "ef": 128 } }, { "parallel": 1, "params": { "ef": 256 } }, { "parallel": 1, "params": { "ef": 512 } }, + { "parallel": 100, "params": { "ef": 64 } }, { "parallel": 100, "params": { "ef": 128 } }, { "parallel": 100, "params": { "ef": 256 } }, { "parallel": 100, "params": { "ef": 512 } } ], "upload_params": { "parallel": 16, "index_params": { "efConstruction": 512, "M": 64 } } } From 190e0ab0e71cba78b2a9a14d4921752e3e2ef955 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Fri, 19 Jul 2024 12:18:03 +0100 Subject: [PATCH 2/4] Fixes per linter: ruff and isort --- engine/clients/milvus/__init__.py | 6 ++++++ engine/clients/milvus/config.py | 5 ++++- engine/clients/milvus/configure.py | 2 -- engine/clients/milvus/search.py | 9 +++++---- engine/clients/milvus/upload.py | 23 ++++++++++++++--------- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/engine/clients/milvus/__init__.py b/engine/clients/milvus/__init__.py index ca400c86..31abe17b 100644 --- a/engine/clients/milvus/__init__.py +++ b/engine/clients/milvus/__init__.py @@ -1,3 +1,9 @@ from engine.clients.milvus.configure import MilvusConfigurator from engine.clients.milvus.search import MilvusSearcher from engine.clients.milvus.upload import MilvusUploader + +__all__ = [ + "MilvusConfigurator", + "MilvusSearcher", + "MilvusUploader", +] diff --git a/engine/clients/milvus/config.py b/engine/clients/milvus/config.py index df5acd3d..0c4da2ea 100644 --- a/engine/clients/milvus/config.py +++ b/engine/clients/milvus/config.py @@ -1,5 +1,7 @@ -from pymilvus import DataType, connections import os + +from pymilvus import DataType, connections + from engine.base_client.distances import Distance MILVUS_COLLECTION_NAME = "Benchmark" @@ -26,6 +28,7 @@ DataType.INT64: 0, DataType.VARCHAR: "---MILVUS DOES NOT ACCEPT EMPTY STRINGS---", DataType.FLOAT: 0.0, + DataType.DOUBLE: 0.0, } diff --git a/engine/clients/milvus/configure.py b/engine/clients/milvus/configure.py index b8fea571..1dba4001 100644 --- a/engine/clients/milvus/configure.py +++ b/engine/clients/milvus/configure.py @@ -3,8 +3,6 @@ CollectionSchema, DataType, FieldSchema, - MilvusException, - connections, ) from pymilvus.exceptions import DataTypeNotSupportException from pymilvus.orm import utility diff --git a/engine/clients/milvus/search.py b/engine/clients/milvus/search.py index f93a8fea..a4204146 100644 --- a/engine/clients/milvus/search.py +++ b/engine/clients/milvus/search.py @@ -3,6 +3,7 @@ from pymilvus import Collection, connections +from dataset_reader.base_reader import Query from engine.base_client.search import BaseSearcher from engine.clients.milvus.config import ( DISTANCE_MAPPING, @@ -32,15 +33,15 @@ def get_mp_start_method(cls): return "forkserver" if "forkserver" in mp.get_all_start_methods() else "spawn" @classmethod - def search_one(cls, vector, meta_conditions, top) -> List[Tuple[int, float]]: - param = {"metric_type": cls.distance, "params": cls.search_params["params"]} + def search_one(cls, query: Query, top: int) -> List[Tuple[int, float]]: + param = {"metric_type": cls.distance, "params": cls.search_params["config"]} try: res = cls.collection.search( - data=[vector], + data=[query.vector], anns_field="vector", param=param, limit=top, - expr=cls.parser.parse(meta_conditions), + expr=cls.parser.parse(query.meta_conditions), ) except Exception as e: import ipdb diff --git a/engine/clients/milvus/upload.py b/engine/clients/milvus/upload.py index 6d0749ca..233d0cb5 100644 --- a/engine/clients/milvus/upload.py +++ b/engine/clients/milvus/upload.py @@ -1,15 +1,15 @@ import logging import multiprocessing as mp -from typing import List, Optional -import backoff +from typing import List +import backoff from pymilvus import ( Collection, MilvusException, - connections, wait_for_index_building_complete, ) +from dataset_reader.base_reader import Record from engine.base_client.upload import BaseUploader from engine.clients.milvus.config import ( DISTANCE_MAPPING, @@ -38,20 +38,25 @@ def init_client(cls, host, distance, connection_params, upload_params): cls.distance = DISTANCE_MAPPING[distance] @classmethod - def upload_batch( - cls, ids: List[int], vectors: List[list], metadata: Optional[List[dict]] - ): - if metadata is not None: + def upload_batch(cls, batch: List[Record]): + has_metadata = any(record.metadata for record in batch) + if has_metadata: field_values = [ [ - payload.get(field_schema.name) or DTYPE_DEFAULT[field_schema.dtype] - for payload in metadata + record.metadata.get(field_schema.name) + or DTYPE_DEFAULT[field_schema.dtype] + for record in batch ] for field_schema in cls.collection.schema.fields if field_schema.name not in ["id", "vector"] ] else: field_values = [] + + ids, vectors = [], [] + for record in batch: + ids.append(record.id) + vectors.append(record.vector) cls.upload_with_backoff(field_values, ids, vectors) @classmethod From b1f4941534dafe97368826af1657b0ce178cb3b3 Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Fri, 19 Jul 2024 12:19:29 +0100 Subject: [PATCH 3/4] Added backoff dependency --- poetry.lock | 29 ++++++++++++++++++++++++++--- pyproject.toml | 1 + 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index e022d87c..2746c4ab 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "annotated-types" @@ -101,6 +101,17 @@ files = [ {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"}, ] +[[package]] +name = "backoff" +version = "2.2.1" +description = "Function decoration for backoff and retry" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + [[package]] name = "backports-zoneinfo" version = "0.2.1" @@ -957,6 +968,7 @@ python-versions = ">=3.7" files = [ {file = "milvus_lite-2.4.7-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:c828190118b104b05b8c8e0b5a4147811c86b54b8fb67bc2e726ad10fc0b544e"}, {file = "milvus_lite-2.4.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e1537633c39879714fb15082be56a4b97f74c905a6e98e302ec01320561081af"}, + {file = "milvus_lite-2.4.7-py3-none-manylinux2014_aarch64.whl", hash = "sha256:fcb909d38c83f21478ca9cb500c84264f988c69f62715ae9462e966767fb76dd"}, {file = "milvus_lite-2.4.7-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f016474d663045787dddf1c3aad13b7d8b61fd329220318f858184918143dcbf"}, ] @@ -1084,8 +1096,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -1648,6 +1660,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1655,8 +1668,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1673,6 +1694,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1680,6 +1702,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2100,4 +2123,4 @@ validators = "0.28.3" [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.12" -content-hash = "ffc0bfbf695bbc2771fd29828e8a76270b5eb01501be717f2464aa573f4a2d5e" +content-hash = "1b9b35e85afb6e6aea3e688427e2a37e8b85fbd963eb32ac780ecf4891a245eb" diff --git a/pyproject.toml b/pyproject.toml index bc8b5c82..2ae4bc26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ opensearch-py = "^2.3.2" tqdm = "^4.66.1" psycopg = {extras = ["binary"], version = "^3.1.17"} pgvector = "^0.2.4" +backoff = "^2.2.1" [tool.poetry.dev-dependencies] pre-commit = "^2.20.0" From 6c582027d1525021a3c39f82b3d94a45d2c5d03a Mon Sep 17 00:00:00 2001 From: filipecosta90 Date: Fri, 19 Jul 2024 12:20:44 +0100 Subject: [PATCH 4/4] Fixes per linter: isort --- engine/clients/milvus/configure.py | 7 +------ engine/clients/milvus/upload.py | 6 +----- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/engine/clients/milvus/configure.py b/engine/clients/milvus/configure.py index 1dba4001..ac3e5c48 100644 --- a/engine/clients/milvus/configure.py +++ b/engine/clients/milvus/configure.py @@ -1,9 +1,4 @@ -from pymilvus import ( - Collection, - CollectionSchema, - DataType, - FieldSchema, -) +from pymilvus import Collection, CollectionSchema, DataType, FieldSchema from pymilvus.exceptions import DataTypeNotSupportException from pymilvus.orm import utility diff --git a/engine/clients/milvus/upload.py b/engine/clients/milvus/upload.py index 233d0cb5..032a0987 100644 --- a/engine/clients/milvus/upload.py +++ b/engine/clients/milvus/upload.py @@ -3,11 +3,7 @@ from typing import List import backoff -from pymilvus import ( - Collection, - MilvusException, - wait_for_index_building_complete, -) +from pymilvus import Collection, MilvusException, wait_for_index_building_complete from dataset_reader.base_reader import Record from engine.base_client.upload import BaseUploader