diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py index 6f6c139a3..579de05d1 100644 --- a/elasticsearch/_async/client/__init__.py +++ b/elasticsearch/_async/client/__init__.py @@ -700,6 +700,7 @@ async def bulk(
  • JavaScript: Check out client.helpers.*
  • .NET: Check out BulkAllObservable
  • PHP: Check out bulk indexing.
  • +
  • Ruby: Check out Elasticsearch::Helpers::BulkHelper
  • Submitting bulk requests with cURL

    If you're providing text file input to curl, you must use the --data-binary flag instead of plain -d. @@ -6010,7 +6011,7 @@ async def termvectors( doc: t.Optional[t.Mapping[str, t.Any]] = None, error_trace: t.Optional[bool] = None, field_statistics: t.Optional[bool] = None, - fields: t.Optional[t.Union[str, t.Sequence[str]]] = None, + fields: t.Optional[t.Sequence[str]] = None, filter: t.Optional[t.Mapping[str, t.Any]] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, diff --git a/elasticsearch/_async/client/cluster.py b/elasticsearch/_async/client/cluster.py index 8b393a265..760b9a775 100644 --- a/elasticsearch/_async/client/cluster.py +++ b/elasticsearch/_async/client/cluster.py @@ -373,8 +373,13 @@ async def get_settings( ``_ :param flat_settings: If `true`, returns settings in flat format. - :param include_defaults: If `true`, returns default cluster settings from the - local node. + :param include_defaults: If `true`, also returns default values for all other + cluster settings, reflecting the values in the `elasticsearch.yml` file of + one of the nodes in the cluster. If the nodes in your cluster do not all + have the same values in their `elasticsearch.yml` config files then the values + returned by this API may vary from invocation to invocation and may not reflect + the values that Elasticsearch uses in all situations. Use the `GET _nodes/settings` + API to fetch the settings for each individual node in your cluster. :param master_timeout: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error. diff --git a/elasticsearch/_async/client/esql.py b/elasticsearch/_async/client/esql.py index bca1e4255..7b80fc656 100644 --- a/elasticsearch/_async/client/esql.py +++ b/elasticsearch/_async/client/esql.py @@ -44,7 +44,7 @@ class EsqlClient(NamespacedClient): async def async_query( self, *, - query: t.Optional[str] = None, + query: t.Optional[t.Union[str, "ESQLBase"]] = None, allow_partial_results: t.Optional[bool] = None, columnar: t.Optional[bool] = None, delimiter: t.Optional[str] = None, @@ -107,7 +107,12 @@ async def async_query( which has the name of all the columns. :param filter: Specify a Query DSL query in the filter parameter to filter the set of documents that an ES|QL query runs on. - :param format: A short version of the Accept header, for example `json` or `yaml`. + :param format: A short version of the Accept header, e.g. json, yaml. `csv`, + `tsv`, and `txt` formats will return results in a tabular format, excluding + other metadata fields from the response. For async requests, nothing will + be returned if the async query doesn't finish within the timeout. The query + ID and running status are available in the `X-Elasticsearch-Async-Id` and + `X-Elasticsearch-Async-Is-Running` HTTP headers of the response, respectively. :param include_ccs_metadata: When set to `true` and performing a cross-cluster query, the response will include an extra `_clusters` object with information about the clusters that participated in the search along with info such as @@ -161,7 +166,7 @@ async def async_query( __query["pretty"] = pretty if not __body: if query is not None: - __body["query"] = query + __body["query"] = str(query) if columnar is not None: __body["columnar"] = columnar if filter is not None: @@ -399,7 +404,7 @@ async def async_query_stop( async def query( self, *, - query: t.Optional[str] = None, + query: t.Optional[t.Union[str, "ESQLBase"]] = None, allow_partial_results: t.Optional[bool] = None, columnar: t.Optional[bool] = None, delimiter: t.Optional[str] = None, @@ -456,7 +461,9 @@ async def query( `all_columns` which has the name of all columns. :param filter: Specify a Query DSL query in the filter parameter to filter the set of documents that an ES|QL query runs on. - :param format: A short version of the Accept header, e.g. json, yaml. + :param format: A short version of the Accept header, e.g. json, yaml. `csv`, + `tsv`, and `txt` formats will return results in a tabular format, excluding + other metadata fields from the response. :param include_ccs_metadata: When set to `true` and performing a cross-cluster query, the response will include an extra `_clusters` object with information about the clusters that participated in the search along with info such as @@ -496,7 +503,7 @@ async def query( __query["pretty"] = pretty if not __body: if query is not None: - __body["query"] = query + __body["query"] = str(query) if columnar is not None: __body["columnar"] = columnar if filter is not None: diff --git a/elasticsearch/_async/client/inference.py b/elasticsearch/_async/client/inference.py index 0083677f4..60182931e 100644 --- a/elasticsearch/_async/client/inference.py +++ b/elasticsearch/_async/client/inference.py @@ -391,21 +391,23 @@ async def put(

    @@ -659,6 +661,112 @@ async def put_amazonbedrock( path_parts=__path_parts, ) + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + async def put_amazonsagemaker( + self, + *, + task_type: t.Union[ + str, + t.Literal[ + "chat_completion", + "completion", + "rerank", + "sparse_embedding", + "text_embedding", + ], + ], + amazonsagemaker_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Create an Amazon SageMaker inference endpoint.

    +

    Create an inference endpoint to perform an inference task with the amazon_sagemaker service.

    + + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param amazonsagemaker_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `amazon_sagemaker`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `amazon_sagemaker` service and `service_settings.api` + you specified. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type and `service_settings.api` you specified. + :param timeout: Specifies the amount of time to wait for the inference endpoint + to be created. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if amazonsagemaker_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'amazonsagemaker_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return await self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_amazonsagemaker", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=( "service", diff --git a/elasticsearch/_async/client/sql.py b/elasticsearch/_async/client/sql.py index 1763739c5..a744af06c 100644 --- a/elasticsearch/_async/client/sql.py +++ b/elasticsearch/_async/client/sql.py @@ -283,7 +283,7 @@ async def query( keep_alive: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, keep_on_completion: t.Optional[bool] = None, page_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, - params: t.Optional[t.Mapping[str, t.Any]] = None, + params: t.Optional[t.Sequence[t.Any]] = None, pretty: t.Optional[bool] = None, query: t.Optional[str] = None, request_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py index 69389fcff..5d1fb8f61 100644 --- a/elasticsearch/_sync/client/__init__.py +++ b/elasticsearch/_sync/client/__init__.py @@ -698,6 +698,7 @@ def bulk(
  • JavaScript: Check out client.helpers.*
  • .NET: Check out BulkAllObservable
  • PHP: Check out bulk indexing.
  • +
  • Ruby: Check out Elasticsearch::Helpers::BulkHelper
  • Submitting bulk requests with cURL

    If you're providing text file input to curl, you must use the --data-binary flag instead of plain -d. @@ -6008,7 +6009,7 @@ def termvectors( doc: t.Optional[t.Mapping[str, t.Any]] = None, error_trace: t.Optional[bool] = None, field_statistics: t.Optional[bool] = None, - fields: t.Optional[t.Union[str, t.Sequence[str]]] = None, + fields: t.Optional[t.Sequence[str]] = None, filter: t.Optional[t.Mapping[str, t.Any]] = None, filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, human: t.Optional[bool] = None, diff --git a/elasticsearch/_sync/client/cluster.py b/elasticsearch/_sync/client/cluster.py index 77ced5e60..d7322bf5f 100644 --- a/elasticsearch/_sync/client/cluster.py +++ b/elasticsearch/_sync/client/cluster.py @@ -373,8 +373,13 @@ def get_settings( ``_ :param flat_settings: If `true`, returns settings in flat format. - :param include_defaults: If `true`, returns default cluster settings from the - local node. + :param include_defaults: If `true`, also returns default values for all other + cluster settings, reflecting the values in the `elasticsearch.yml` file of + one of the nodes in the cluster. If the nodes in your cluster do not all + have the same values in their `elasticsearch.yml` config files then the values + returned by this API may vary from invocation to invocation and may not reflect + the values that Elasticsearch uses in all situations. Use the `GET _nodes/settings` + API to fetch the settings for each individual node in your cluster. :param master_timeout: Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error. diff --git a/elasticsearch/_sync/client/esql.py b/elasticsearch/_sync/client/esql.py index e34a26fb8..67a4746ae 100644 --- a/elasticsearch/_sync/client/esql.py +++ b/elasticsearch/_sync/client/esql.py @@ -44,7 +44,7 @@ class EsqlClient(NamespacedClient): def async_query( self, *, - query: t.Optional[str] = None, + query: t.Optional[t.Union[str, "ESQLBase"]] = None, allow_partial_results: t.Optional[bool] = None, columnar: t.Optional[bool] = None, delimiter: t.Optional[str] = None, @@ -107,7 +107,12 @@ def async_query( which has the name of all the columns. :param filter: Specify a Query DSL query in the filter parameter to filter the set of documents that an ES|QL query runs on. - :param format: A short version of the Accept header, for example `json` or `yaml`. + :param format: A short version of the Accept header, e.g. json, yaml. `csv`, + `tsv`, and `txt` formats will return results in a tabular format, excluding + other metadata fields from the response. For async requests, nothing will + be returned if the async query doesn't finish within the timeout. The query + ID and running status are available in the `X-Elasticsearch-Async-Id` and + `X-Elasticsearch-Async-Is-Running` HTTP headers of the response, respectively. :param include_ccs_metadata: When set to `true` and performing a cross-cluster query, the response will include an extra `_clusters` object with information about the clusters that participated in the search along with info such as @@ -161,7 +166,7 @@ def async_query( __query["pretty"] = pretty if not __body: if query is not None: - __body["query"] = query + __body["query"] = str(query) if columnar is not None: __body["columnar"] = columnar if filter is not None: @@ -399,7 +404,7 @@ def async_query_stop( def query( self, *, - query: t.Optional[str] = None, + query: t.Optional[t.Union[str, "ESQLBase"]] = None, allow_partial_results: t.Optional[bool] = None, columnar: t.Optional[bool] = None, delimiter: t.Optional[str] = None, @@ -456,7 +461,9 @@ def query( `all_columns` which has the name of all columns. :param filter: Specify a Query DSL query in the filter parameter to filter the set of documents that an ES|QL query runs on. - :param format: A short version of the Accept header, e.g. json, yaml. + :param format: A short version of the Accept header, e.g. json, yaml. `csv`, + `tsv`, and `txt` formats will return results in a tabular format, excluding + other metadata fields from the response. :param include_ccs_metadata: When set to `true` and performing a cross-cluster query, the response will include an extra `_clusters` object with information about the clusters that participated in the search along with info such as @@ -496,7 +503,7 @@ def query( __query["pretty"] = pretty if not __body: if query is not None: - __body["query"] = query + __body["query"] = str(query) if columnar is not None: __body["columnar"] = columnar if filter is not None: diff --git a/elasticsearch/_sync/client/inference.py b/elasticsearch/_sync/client/inference.py index 1478883e8..c2668ef09 100644 --- a/elasticsearch/_sync/client/inference.py +++ b/elasticsearch/_sync/client/inference.py @@ -391,21 +391,23 @@ def put(

    @@ -659,6 +661,112 @@ def put_amazonbedrock( path_parts=__path_parts, ) + @_rewrite_parameters( + body_fields=( + "service", + "service_settings", + "chunking_settings", + "task_settings", + ), + ) + def put_amazonsagemaker( + self, + *, + task_type: t.Union[ + str, + t.Literal[ + "chat_completion", + "completion", + "rerank", + "sparse_embedding", + "text_embedding", + ], + ], + amazonsagemaker_inference_id: str, + service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None, + service_settings: t.Optional[t.Mapping[str, t.Any]] = None, + chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None, + error_trace: t.Optional[bool] = None, + filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None, + human: t.Optional[bool] = None, + pretty: t.Optional[bool] = None, + task_settings: t.Optional[t.Mapping[str, t.Any]] = None, + timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, + body: t.Optional[t.Dict[str, t.Any]] = None, + ) -> ObjectApiResponse[t.Any]: + """ + .. raw:: html + +

    Create an Amazon SageMaker inference endpoint.

    +

    Create an inference endpoint to perform an inference task with the amazon_sagemaker service.

    + + + ``_ + + :param task_type: The type of the inference task that the model will perform. + :param amazonsagemaker_inference_id: The unique identifier of the inference endpoint. + :param service: The type of service supported for the specified task type. In + this case, `amazon_sagemaker`. + :param service_settings: Settings used to install the inference model. These + settings are specific to the `amazon_sagemaker` service and `service_settings.api` + you specified. + :param chunking_settings: The chunking configuration object. + :param task_settings: Settings to configure the inference task. These settings + are specific to the task type and `service_settings.api` you specified. + :param timeout: Specifies the amount of time to wait for the inference endpoint + to be created. + """ + if task_type in SKIP_IN_PATH: + raise ValueError("Empty value passed for parameter 'task_type'") + if amazonsagemaker_inference_id in SKIP_IN_PATH: + raise ValueError( + "Empty value passed for parameter 'amazonsagemaker_inference_id'" + ) + if service is None and body is None: + raise ValueError("Empty value passed for parameter 'service'") + if service_settings is None and body is None: + raise ValueError("Empty value passed for parameter 'service_settings'") + __path_parts: t.Dict[str, str] = { + "task_type": _quote(task_type), + "amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id), + } + __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}' + __query: t.Dict[str, t.Any] = {} + __body: t.Dict[str, t.Any] = body if body is not None else {} + if error_trace is not None: + __query["error_trace"] = error_trace + if filter_path is not None: + __query["filter_path"] = filter_path + if human is not None: + __query["human"] = human + if pretty is not None: + __query["pretty"] = pretty + if timeout is not None: + __query["timeout"] = timeout + if not __body: + if service is not None: + __body["service"] = service + if service_settings is not None: + __body["service_settings"] = service_settings + if chunking_settings is not None: + __body["chunking_settings"] = chunking_settings + if task_settings is not None: + __body["task_settings"] = task_settings + if not __body: + __body = None # type: ignore[assignment] + __headers = {"accept": "application/json"} + if __body is not None: + __headers["content-type"] = "application/json" + return self.perform_request( # type: ignore[return-value] + "PUT", + __path, + params=__query, + headers=__headers, + body=__body, + endpoint_id="inference.put_amazonsagemaker", + path_parts=__path_parts, + ) + @_rewrite_parameters( body_fields=( "service", diff --git a/elasticsearch/_sync/client/sql.py b/elasticsearch/_sync/client/sql.py index cde458be5..094a1b016 100644 --- a/elasticsearch/_sync/client/sql.py +++ b/elasticsearch/_sync/client/sql.py @@ -283,7 +283,7 @@ def query( keep_alive: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, keep_on_completion: t.Optional[bool] = None, page_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, - params: t.Optional[t.Mapping[str, t.Any]] = None, + params: t.Optional[t.Sequence[t.Any]] = None, pretty: t.Optional[bool] = None, query: t.Optional[str] = None, request_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None, diff --git a/elasticsearch/dsl/aggs.py b/elasticsearch/dsl/aggs.py index 3b7a8e8ba..7f1019b56 100644 --- a/elasticsearch/dsl/aggs.py +++ b/elasticsearch/dsl/aggs.py @@ -372,6 +372,12 @@ class Boxplot(Agg[_R]): :arg compression: Limits the maximum number of nodes used by the underlying TDigest algorithm to `20 * compression`, enabling control of memory usage and approximation error. + :arg execution_hint: The default implementation of TDigest is + optimized for performance, scaling to millions or even billions of + sample values while maintaining acceptable accuracy levels (close + to 1% relative error for millions of samples in some cases). To + use an implementation optimized for accuracy, set this parameter + to high_accuracy instead. Defaults to `default` if omitted. :arg field: The field on which to run the aggregation. :arg missing: The value to apply to documents that do not have a value. By default, documents without a value are ignored. @@ -384,6 +390,9 @@ def __init__( self, *, compression: Union[float, "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal["default", "high_accuracy"], "DefaultType" + ] = DEFAULT, field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT, @@ -391,6 +400,7 @@ def __init__( ): super().__init__( compression=compression, + execution_hint=execution_hint, field=field, missing=missing, script=script, @@ -1897,6 +1907,12 @@ class MedianAbsoluteDeviation(Agg[_R]): underlying TDigest algorithm to `20 * compression`, enabling control of memory usage and approximation error. Defaults to `1000` if omitted. + :arg execution_hint: The default implementation of TDigest is + optimized for performance, scaling to millions or even billions of + sample values while maintaining acceptable accuracy levels (close + to 1% relative error for millions of samples in some cases). To + use an implementation optimized for accuracy, set this parameter + to high_accuracy instead. Defaults to `default` if omitted. :arg format: :arg field: The field on which to run the aggregation. :arg missing: The value to apply to documents that do not have a @@ -1910,6 +1926,9 @@ def __init__( self, *, compression: Union[float, "DefaultType"] = DEFAULT, + execution_hint: Union[ + Literal["default", "high_accuracy"], "DefaultType" + ] = DEFAULT, format: Union[str, "DefaultType"] = DEFAULT, field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT, missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT, @@ -1918,6 +1937,7 @@ def __init__( ): super().__init__( compression=compression, + execution_hint=execution_hint, format=format, field=field, missing=missing, diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py index 1aa7a4bca..c33261458 100644 --- a/elasticsearch/dsl/field.py +++ b/elasticsearch/dsl/field.py @@ -3849,6 +3849,14 @@ class SemanticText(Field): by using the Update mapping API. Use the Create inference API to create the endpoint. If not specified, the inference endpoint defined by inference_id will be used at both index and query time. + :arg index_options: Settings for index_options that override any + defaults used by semantic_text, for example specific quantization + settings. + :arg chunking_settings: Settings for chunking text into smaller + passages. If specified, these will override the chunking settings + sent in the inference endpoint associated with inference_id. If + chunking settings are updated, they will not be applied to + existing documents until they are reindexed. """ name = "semantic_text" @@ -3859,6 +3867,12 @@ def __init__( meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT, inference_id: Union[str, "DefaultType"] = DEFAULT, search_inference_id: Union[str, "DefaultType"] = DEFAULT, + index_options: Union[ + "types.SemanticTextIndexOptions", Dict[str, Any], "DefaultType" + ] = DEFAULT, + chunking_settings: Union[ + "types.ChunkingSettings", Dict[str, Any], "DefaultType" + ] = DEFAULT, **kwargs: Any, ): if meta is not DEFAULT: @@ -3867,6 +3881,10 @@ def __init__( kwargs["inference_id"] = inference_id if search_inference_id is not DEFAULT: kwargs["search_inference_id"] = search_inference_id + if index_options is not DEFAULT: + kwargs["index_options"] = index_options + if chunking_settings is not DEFAULT: + kwargs["chunking_settings"] = chunking_settings super().__init__(*args, **kwargs) @@ -4063,6 +4081,9 @@ def __init__( class SparseVector(Field): """ :arg store: + :arg index_options: Additional index options for the sparse vector + field that controls the token pruning behavior of the sparse + vector field. :arg meta: Metadata about the field. :arg properties: :arg ignore_above: @@ -4081,6 +4102,9 @@ def __init__( self, *args: Any, store: Union[bool, "DefaultType"] = DEFAULT, + index_options: Union[ + "types.SparseVectorIndexOptions", Dict[str, Any], "DefaultType" + ] = DEFAULT, meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT, properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT, ignore_above: Union[int, "DefaultType"] = DEFAULT, @@ -4095,6 +4119,8 @@ def __init__( ): if store is not DEFAULT: kwargs["store"] = store + if index_options is not DEFAULT: + kwargs["index_options"] = index_options if meta is not DEFAULT: kwargs["meta"] = meta if properties is not DEFAULT: diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py index 2e616f644..b65dd1210 100644 --- a/elasticsearch/dsl/types.py +++ b/elasticsearch/dsl/types.py @@ -170,6 +170,48 @@ def __init__( super().__init__(kwargs) +class ChunkingSettings(AttrDict[Any]): + """ + :arg strategy: (required) The chunking strategy: `sentence` or `word`. + Defaults to `sentence` if omitted. + :arg max_chunk_size: (required) The maximum size of a chunk in words. + This value cannot be higher than `300` or lower than `20` (for + `sentence` strategy) or `10` (for `word` strategy). Defaults to + `250` if omitted. + :arg overlap: The number of overlapping words for chunks. It is + applicable only to a `word` chunking strategy. This value cannot + be higher than half the `max_chunk_size` value. Defaults to `100` + if omitted. + :arg sentence_overlap: The number of overlapping sentences for chunks. + It is applicable only for a `sentence` chunking strategy. It can + be either `1` or `0`. Defaults to `1` if omitted. + """ + + strategy: Union[str, DefaultType] + max_chunk_size: Union[int, DefaultType] + overlap: Union[int, DefaultType] + sentence_overlap: Union[int, DefaultType] + + def __init__( + self, + *, + strategy: Union[str, DefaultType] = DEFAULT, + max_chunk_size: Union[int, DefaultType] = DEFAULT, + overlap: Union[int, DefaultType] = DEFAULT, + sentence_overlap: Union[int, DefaultType] = DEFAULT, + **kwargs: Any, + ): + if strategy is not DEFAULT: + kwargs["strategy"] = strategy + if max_chunk_size is not DEFAULT: + kwargs["max_chunk_size"] = max_chunk_size + if overlap is not DEFAULT: + kwargs["overlap"] = overlap + if sentence_overlap is not DEFAULT: + kwargs["sentence_overlap"] = sentence_overlap + super().__init__(kwargs) + + class ClassificationInferenceOptions(AttrDict[Any]): """ :arg num_top_classes: Specifies the number of top class predictions to @@ -3119,6 +3161,26 @@ def __init__( super().__init__(kwargs) +class SemanticTextIndexOptions(AttrDict[Any]): + """ + :arg dense_vector: + """ + + dense_vector: Union["DenseVectorIndexOptions", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + dense_vector: Union[ + "DenseVectorIndexOptions", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if dense_vector is not DEFAULT: + kwargs["dense_vector"] = dense_vector + super().__init__(kwargs) + + class ShapeFieldQuery(AttrDict[Any]): """ :arg indexed_shape: Queries using a pre-indexed shape. @@ -3196,10 +3258,15 @@ def __init__( class SourceFilter(AttrDict[Any]): """ - :arg excludes: - :arg includes: + :arg exclude_vectors: If `true`, vector fields are excluded from the + returned source. This option takes precedence over `includes`: + any vector field will remain excluded even if it matches an + `includes` rule. + :arg excludes: A list of fields to exclude from the returned source. + :arg includes: A list of fields to include in the returned source. """ + exclude_vectors: Union[bool, DefaultType] excludes: Union[ Union[str, InstrumentedField], Sequence[Union[str, InstrumentedField]], @@ -3214,6 +3281,7 @@ class SourceFilter(AttrDict[Any]): def __init__( self, *, + exclude_vectors: Union[bool, DefaultType] = DEFAULT, excludes: Union[ Union[str, InstrumentedField], Sequence[Union[str, InstrumentedField]], @@ -3226,6 +3294,8 @@ def __init__( ] = DEFAULT, **kwargs: Any, ): + if exclude_vectors is not DEFAULT: + kwargs["exclude_vectors"] = exclude_vectors if excludes is not DEFAULT: kwargs["excludes"] = str(excludes) if includes is not DEFAULT: @@ -3675,6 +3745,38 @@ def __init__( super().__init__(kwargs) +class SparseVectorIndexOptions(AttrDict[Any]): + """ + :arg prune: Whether to perform pruning, omitting the non-significant + tokens from the query to improve query performance. If prune is + true but the pruning_config is not specified, pruning will occur + but default values will be used. Default: false + :arg pruning_config: Optional pruning configuration. If enabled, this + will omit non-significant tokens from the query in order to + improve query performance. This is only used if prune is set to + true. If prune is set to true but pruning_config is not specified, + default values will be used. + """ + + prune: Union[bool, DefaultType] + pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType] + + def __init__( + self, + *, + prune: Union[bool, DefaultType] = DEFAULT, + pruning_config: Union[ + "TokenPruningConfig", Dict[str, Any], DefaultType + ] = DEFAULT, + **kwargs: Any, + ): + if prune is not DEFAULT: + kwargs["prune"] = prune + if pruning_config is not DEFAULT: + kwargs["pruning_config"] = pruning_config + super().__init__(kwargs) + + class SuggestContext(AttrDict[Any]): """ :arg name: (required) @@ -3713,15 +3815,30 @@ class TDigest(AttrDict[Any]): :arg compression: Limits the maximum number of nodes used by the underlying TDigest algorithm to `20 * compression`, enabling control of memory usage and approximation error. + :arg execution_hint: The default implementation of TDigest is + optimized for performance, scaling to millions or even billions of + sample values while maintaining acceptable accuracy levels (close + to 1% relative error for millions of samples in some cases). To + use an implementation optimized for accuracy, set this parameter + to high_accuracy instead. Defaults to `default` if omitted. """ compression: Union[int, DefaultType] + execution_hint: Union[Literal["default", "high_accuracy"], DefaultType] def __init__( - self, *, compression: Union[int, DefaultType] = DEFAULT, **kwargs: Any + self, + *, + compression: Union[int, DefaultType] = DEFAULT, + execution_hint: Union[ + Literal["default", "high_accuracy"], DefaultType + ] = DEFAULT, + **kwargs: Any, ): if compression is not DEFAULT: kwargs["compression"] = compression + if execution_hint is not DEFAULT: + kwargs["execution_hint"] = execution_hint super().__init__(kwargs) @@ -4444,7 +4561,7 @@ class ArrayPercentilesItem(AttrDict[Any]): :arg value_as_string: """ - key: str + key: float value: Union[float, None] value_as_string: str @@ -5290,7 +5407,9 @@ class HdrPercentileRanksAggregate(AttrDict[Any]): :arg meta: """ - values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + values: Union[ + Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"] + ] meta: Mapping[str, Any] @@ -5300,7 +5419,9 @@ class HdrPercentilesAggregate(AttrDict[Any]): :arg meta: """ - values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + values: Union[ + Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"] + ] meta: Mapping[str, Any] @@ -5809,7 +5930,9 @@ class PercentilesBucketAggregate(AttrDict[Any]): :arg meta: """ - values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + values: Union[ + Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"] + ] meta: Mapping[str, Any] @@ -6344,7 +6467,9 @@ class TDigestPercentileRanksAggregate(AttrDict[Any]): :arg meta: """ - values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + values: Union[ + Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"] + ] meta: Mapping[str, Any] @@ -6354,7 +6479,9 @@ class TDigestPercentilesAggregate(AttrDict[Any]): :arg meta: """ - values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]] + values: Union[ + Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"] + ] meta: Mapping[str, Any]