diff --git a/elasticsearch/_async/client/__init__.py b/elasticsearch/_async/client/__init__.py
index 6f6c139a3..579de05d1 100644
--- a/elasticsearch/_async/client/__init__.py
+++ b/elasticsearch/_async/client/__init__.py
@@ -700,6 +700,7 @@ async def bulk(
JavaScript: Check out client.helpers.*
.NET: Check out BulkAllObservable
PHP: Check out bulk indexing.
+ Ruby: Check out Elasticsearch::Helpers::BulkHelper
Submitting bulk requests with cURL
If you're providing text file input to curl
, you must use the --data-binary
flag instead of plain -d
.
@@ -6010,7 +6011,7 @@ async def termvectors(
doc: t.Optional[t.Mapping[str, t.Any]] = None,
error_trace: t.Optional[bool] = None,
field_statistics: t.Optional[bool] = None,
- fields: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ fields: t.Optional[t.Sequence[str]] = None,
filter: t.Optional[t.Mapping[str, t.Any]] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
diff --git a/elasticsearch/_async/client/cluster.py b/elasticsearch/_async/client/cluster.py
index 8b393a265..760b9a775 100644
--- a/elasticsearch/_async/client/cluster.py
+++ b/elasticsearch/_async/client/cluster.py
@@ -373,8 +373,13 @@ async def get_settings(
``_
:param flat_settings: If `true`, returns settings in flat format.
- :param include_defaults: If `true`, returns default cluster settings from the
- local node.
+ :param include_defaults: If `true`, also returns default values for all other
+ cluster settings, reflecting the values in the `elasticsearch.yml` file of
+ one of the nodes in the cluster. If the nodes in your cluster do not all
+ have the same values in their `elasticsearch.yml` config files then the values
+ returned by this API may vary from invocation to invocation and may not reflect
+ the values that Elasticsearch uses in all situations. Use the `GET _nodes/settings`
+ API to fetch the settings for each individual node in your cluster.
:param master_timeout: Period to wait for a connection to the master node. If
no response is received before the timeout expires, the request fails and
returns an error.
diff --git a/elasticsearch/_async/client/esql.py b/elasticsearch/_async/client/esql.py
index bca1e4255..7b80fc656 100644
--- a/elasticsearch/_async/client/esql.py
+++ b/elasticsearch/_async/client/esql.py
@@ -44,7 +44,7 @@ class EsqlClient(NamespacedClient):
async def async_query(
self,
*,
- query: t.Optional[str] = None,
+ query: t.Optional[t.Union[str, "ESQLBase"]] = None,
allow_partial_results: t.Optional[bool] = None,
columnar: t.Optional[bool] = None,
delimiter: t.Optional[str] = None,
@@ -107,7 +107,12 @@ async def async_query(
which has the name of all the columns.
:param filter: Specify a Query DSL query in the filter parameter to filter the
set of documents that an ES|QL query runs on.
- :param format: A short version of the Accept header, for example `json` or `yaml`.
+ :param format: A short version of the Accept header, e.g. json, yaml. `csv`,
+ `tsv`, and `txt` formats will return results in a tabular format, excluding
+ other metadata fields from the response. For async requests, nothing will
+ be returned if the async query doesn't finish within the timeout. The query
+ ID and running status are available in the `X-Elasticsearch-Async-Id` and
+ `X-Elasticsearch-Async-Is-Running` HTTP headers of the response, respectively.
:param include_ccs_metadata: When set to `true` and performing a cross-cluster
query, the response will include an extra `_clusters` object with information
about the clusters that participated in the search along with info such as
@@ -161,7 +166,7 @@ async def async_query(
__query["pretty"] = pretty
if not __body:
if query is not None:
- __body["query"] = query
+ __body["query"] = str(query)
if columnar is not None:
__body["columnar"] = columnar
if filter is not None:
@@ -399,7 +404,7 @@ async def async_query_stop(
async def query(
self,
*,
- query: t.Optional[str] = None,
+ query: t.Optional[t.Union[str, "ESQLBase"]] = None,
allow_partial_results: t.Optional[bool] = None,
columnar: t.Optional[bool] = None,
delimiter: t.Optional[str] = None,
@@ -456,7 +461,9 @@ async def query(
`all_columns` which has the name of all columns.
:param filter: Specify a Query DSL query in the filter parameter to filter the
set of documents that an ES|QL query runs on.
- :param format: A short version of the Accept header, e.g. json, yaml.
+ :param format: A short version of the Accept header, e.g. json, yaml. `csv`,
+ `tsv`, and `txt` formats will return results in a tabular format, excluding
+ other metadata fields from the response.
:param include_ccs_metadata: When set to `true` and performing a cross-cluster
query, the response will include an extra `_clusters` object with information
about the clusters that participated in the search along with info such as
@@ -496,7 +503,7 @@ async def query(
__query["pretty"] = pretty
if not __body:
if query is not None:
- __body["query"] = query
+ __body["query"] = str(query)
if columnar is not None:
__body["columnar"] = columnar
if filter is not None:
diff --git a/elasticsearch/_async/client/inference.py b/elasticsearch/_async/client/inference.py
index 0083677f4..60182931e 100644
--- a/elasticsearch/_async/client/inference.py
+++ b/elasticsearch/_async/client/inference.py
@@ -391,21 +391,23 @@ async def put(
- AlibabaCloud AI Search (
completion
, rerank
, sparse_embedding
, text_embedding
)
- Amazon Bedrock (
completion
, text_embedding
)
+ - Amazon SageMaker (
chat_completion
, completion
, rerank
, sparse_embedding
, text_embedding
)
- Anthropic (
completion
)
- Azure AI Studio (
completion
, text_embedding
)
- Azure OpenAI (
completion
, text_embedding
)
- Cohere (
completion
, rerank
, text_embedding
)
- - DeepSeek (
completion
, chat_completion
)
+ - DeepSeek (
chat_completion
, completion
)
- Elasticsearch (
rerank
, sparse_embedding
, text_embedding
- this service is for built-in models and models uploaded through Eland)
- ELSER (
sparse_embedding
)
- Google AI Studio (
completion
, text_embedding
)
- - Google Vertex AI (
rerank
, text_embedding
)
+ - Google Vertex AI (
chat_completion
, completion
, rerank
, text_embedding
)
- Hugging Face (
chat_completion
, completion
, rerank
, text_embedding
)
+ - JinaAI (
rerank
, text_embedding
)
+ - Llama (
chat_completion
, completion
, text_embedding
)
- Mistral (
chat_completion
, completion
, text_embedding
)
- OpenAI (
chat_completion
, completion
, text_embedding
)
- - VoyageAI (
text_embedding
, rerank
)
+ - VoyageAI (
rerank
, text_embedding
)
- Watsonx inference integration (
text_embedding
)
- - JinaAI (
text_embedding
, rerank
)
@@ -659,6 +661,112 @@ async def put_amazonbedrock(
path_parts=__path_parts,
)
+ @_rewrite_parameters(
+ body_fields=(
+ "service",
+ "service_settings",
+ "chunking_settings",
+ "task_settings",
+ ),
+ )
+ async def put_amazonsagemaker(
+ self,
+ *,
+ task_type: t.Union[
+ str,
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
+ ],
+ amazonsagemaker_inference_id: str,
+ service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create an Amazon SageMaker inference endpoint.
+ Create an inference endpoint to perform an inference task with the amazon_sagemaker
service.
+
+
+ ``_
+
+ :param task_type: The type of the inference task that the model will perform.
+ :param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
+ :param service: The type of service supported for the specified task type. In
+ this case, `amazon_sagemaker`.
+ :param service_settings: Settings used to install the inference model. These
+ settings are specific to the `amazon_sagemaker` service and `service_settings.api`
+ you specified.
+ :param chunking_settings: The chunking configuration object.
+ :param task_settings: Settings to configure the inference task. These settings
+ are specific to the task type and `service_settings.api` you specified.
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
+ to be created.
+ """
+ if task_type in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'task_type'")
+ if amazonsagemaker_inference_id in SKIP_IN_PATH:
+ raise ValueError(
+ "Empty value passed for parameter 'amazonsagemaker_inference_id'"
+ )
+ if service is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service'")
+ if service_settings is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service_settings'")
+ __path_parts: t.Dict[str, str] = {
+ "task_type": _quote(task_type),
+ "amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
+ }
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if service is not None:
+ __body["service"] = service
+ if service_settings is not None:
+ __body["service_settings"] = service_settings
+ if chunking_settings is not None:
+ __body["chunking_settings"] = chunking_settings
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return await self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.put_amazonsagemaker",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters(
body_fields=(
"service",
diff --git a/elasticsearch/_async/client/sql.py b/elasticsearch/_async/client/sql.py
index 1763739c5..a744af06c 100644
--- a/elasticsearch/_async/client/sql.py
+++ b/elasticsearch/_async/client/sql.py
@@ -283,7 +283,7 @@ async def query(
keep_alive: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
keep_on_completion: t.Optional[bool] = None,
page_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- params: t.Optional[t.Mapping[str, t.Any]] = None,
+ params: t.Optional[t.Sequence[t.Any]] = None,
pretty: t.Optional[bool] = None,
query: t.Optional[str] = None,
request_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
diff --git a/elasticsearch/_sync/client/__init__.py b/elasticsearch/_sync/client/__init__.py
index 69389fcff..5d1fb8f61 100644
--- a/elasticsearch/_sync/client/__init__.py
+++ b/elasticsearch/_sync/client/__init__.py
@@ -698,6 +698,7 @@ def bulk(
JavaScript: Check out client.helpers.*
.NET: Check out BulkAllObservable
PHP: Check out bulk indexing.
+ Ruby: Check out Elasticsearch::Helpers::BulkHelper
Submitting bulk requests with cURL
If you're providing text file input to curl
, you must use the --data-binary
flag instead of plain -d
.
@@ -6008,7 +6009,7 @@ def termvectors(
doc: t.Optional[t.Mapping[str, t.Any]] = None,
error_trace: t.Optional[bool] = None,
field_statistics: t.Optional[bool] = None,
- fields: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ fields: t.Optional[t.Sequence[str]] = None,
filter: t.Optional[t.Mapping[str, t.Any]] = None,
filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
human: t.Optional[bool] = None,
diff --git a/elasticsearch/_sync/client/cluster.py b/elasticsearch/_sync/client/cluster.py
index 77ced5e60..d7322bf5f 100644
--- a/elasticsearch/_sync/client/cluster.py
+++ b/elasticsearch/_sync/client/cluster.py
@@ -373,8 +373,13 @@ def get_settings(
``_
:param flat_settings: If `true`, returns settings in flat format.
- :param include_defaults: If `true`, returns default cluster settings from the
- local node.
+ :param include_defaults: If `true`, also returns default values for all other
+ cluster settings, reflecting the values in the `elasticsearch.yml` file of
+ one of the nodes in the cluster. If the nodes in your cluster do not all
+ have the same values in their `elasticsearch.yml` config files then the values
+ returned by this API may vary from invocation to invocation and may not reflect
+ the values that Elasticsearch uses in all situations. Use the `GET _nodes/settings`
+ API to fetch the settings for each individual node in your cluster.
:param master_timeout: Period to wait for a connection to the master node. If
no response is received before the timeout expires, the request fails and
returns an error.
diff --git a/elasticsearch/_sync/client/esql.py b/elasticsearch/_sync/client/esql.py
index e34a26fb8..67a4746ae 100644
--- a/elasticsearch/_sync/client/esql.py
+++ b/elasticsearch/_sync/client/esql.py
@@ -44,7 +44,7 @@ class EsqlClient(NamespacedClient):
def async_query(
self,
*,
- query: t.Optional[str] = None,
+ query: t.Optional[t.Union[str, "ESQLBase"]] = None,
allow_partial_results: t.Optional[bool] = None,
columnar: t.Optional[bool] = None,
delimiter: t.Optional[str] = None,
@@ -107,7 +107,12 @@ def async_query(
which has the name of all the columns.
:param filter: Specify a Query DSL query in the filter parameter to filter the
set of documents that an ES|QL query runs on.
- :param format: A short version of the Accept header, for example `json` or `yaml`.
+ :param format: A short version of the Accept header, e.g. json, yaml. `csv`,
+ `tsv`, and `txt` formats will return results in a tabular format, excluding
+ other metadata fields from the response. For async requests, nothing will
+ be returned if the async query doesn't finish within the timeout. The query
+ ID and running status are available in the `X-Elasticsearch-Async-Id` and
+ `X-Elasticsearch-Async-Is-Running` HTTP headers of the response, respectively.
:param include_ccs_metadata: When set to `true` and performing a cross-cluster
query, the response will include an extra `_clusters` object with information
about the clusters that participated in the search along with info such as
@@ -161,7 +166,7 @@ def async_query(
__query["pretty"] = pretty
if not __body:
if query is not None:
- __body["query"] = query
+ __body["query"] = str(query)
if columnar is not None:
__body["columnar"] = columnar
if filter is not None:
@@ -399,7 +404,7 @@ def async_query_stop(
def query(
self,
*,
- query: t.Optional[str] = None,
+ query: t.Optional[t.Union[str, "ESQLBase"]] = None,
allow_partial_results: t.Optional[bool] = None,
columnar: t.Optional[bool] = None,
delimiter: t.Optional[str] = None,
@@ -456,7 +461,9 @@ def query(
`all_columns` which has the name of all columns.
:param filter: Specify a Query DSL query in the filter parameter to filter the
set of documents that an ES|QL query runs on.
- :param format: A short version of the Accept header, e.g. json, yaml.
+ :param format: A short version of the Accept header, e.g. json, yaml. `csv`,
+ `tsv`, and `txt` formats will return results in a tabular format, excluding
+ other metadata fields from the response.
:param include_ccs_metadata: When set to `true` and performing a cross-cluster
query, the response will include an extra `_clusters` object with information
about the clusters that participated in the search along with info such as
@@ -496,7 +503,7 @@ def query(
__query["pretty"] = pretty
if not __body:
if query is not None:
- __body["query"] = query
+ __body["query"] = str(query)
if columnar is not None:
__body["columnar"] = columnar
if filter is not None:
diff --git a/elasticsearch/_sync/client/inference.py b/elasticsearch/_sync/client/inference.py
index 1478883e8..c2668ef09 100644
--- a/elasticsearch/_sync/client/inference.py
+++ b/elasticsearch/_sync/client/inference.py
@@ -391,21 +391,23 @@ def put(
- AlibabaCloud AI Search (
completion
, rerank
, sparse_embedding
, text_embedding
)
- Amazon Bedrock (
completion
, text_embedding
)
+ - Amazon SageMaker (
chat_completion
, completion
, rerank
, sparse_embedding
, text_embedding
)
- Anthropic (
completion
)
- Azure AI Studio (
completion
, text_embedding
)
- Azure OpenAI (
completion
, text_embedding
)
- Cohere (
completion
, rerank
, text_embedding
)
- - DeepSeek (
completion
, chat_completion
)
+ - DeepSeek (
chat_completion
, completion
)
- Elasticsearch (
rerank
, sparse_embedding
, text_embedding
- this service is for built-in models and models uploaded through Eland)
- ELSER (
sparse_embedding
)
- Google AI Studio (
completion
, text_embedding
)
- - Google Vertex AI (
rerank
, text_embedding
)
+ - Google Vertex AI (
chat_completion
, completion
, rerank
, text_embedding
)
- Hugging Face (
chat_completion
, completion
, rerank
, text_embedding
)
+ - JinaAI (
rerank
, text_embedding
)
+ - Llama (
chat_completion
, completion
, text_embedding
)
- Mistral (
chat_completion
, completion
, text_embedding
)
- OpenAI (
chat_completion
, completion
, text_embedding
)
- - VoyageAI (
text_embedding
, rerank
)
+ - VoyageAI (
rerank
, text_embedding
)
- Watsonx inference integration (
text_embedding
)
- - JinaAI (
text_embedding
, rerank
)
@@ -659,6 +661,112 @@ def put_amazonbedrock(
path_parts=__path_parts,
)
+ @_rewrite_parameters(
+ body_fields=(
+ "service",
+ "service_settings",
+ "chunking_settings",
+ "task_settings",
+ ),
+ )
+ def put_amazonsagemaker(
+ self,
+ *,
+ task_type: t.Union[
+ str,
+ t.Literal[
+ "chat_completion",
+ "completion",
+ "rerank",
+ "sparse_embedding",
+ "text_embedding",
+ ],
+ ],
+ amazonsagemaker_inference_id: str,
+ service: t.Optional[t.Union[str, t.Literal["amazon_sagemaker"]]] = None,
+ service_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ chunking_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ error_trace: t.Optional[bool] = None,
+ filter_path: t.Optional[t.Union[str, t.Sequence[str]]] = None,
+ human: t.Optional[bool] = None,
+ pretty: t.Optional[bool] = None,
+ task_settings: t.Optional[t.Mapping[str, t.Any]] = None,
+ timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
+ body: t.Optional[t.Dict[str, t.Any]] = None,
+ ) -> ObjectApiResponse[t.Any]:
+ """
+ .. raw:: html
+
+ Create an Amazon SageMaker inference endpoint.
+ Create an inference endpoint to perform an inference task with the amazon_sagemaker
service.
+
+
+ ``_
+
+ :param task_type: The type of the inference task that the model will perform.
+ :param amazonsagemaker_inference_id: The unique identifier of the inference endpoint.
+ :param service: The type of service supported for the specified task type. In
+ this case, `amazon_sagemaker`.
+ :param service_settings: Settings used to install the inference model. These
+ settings are specific to the `amazon_sagemaker` service and `service_settings.api`
+ you specified.
+ :param chunking_settings: The chunking configuration object.
+ :param task_settings: Settings to configure the inference task. These settings
+ are specific to the task type and `service_settings.api` you specified.
+ :param timeout: Specifies the amount of time to wait for the inference endpoint
+ to be created.
+ """
+ if task_type in SKIP_IN_PATH:
+ raise ValueError("Empty value passed for parameter 'task_type'")
+ if amazonsagemaker_inference_id in SKIP_IN_PATH:
+ raise ValueError(
+ "Empty value passed for parameter 'amazonsagemaker_inference_id'"
+ )
+ if service is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service'")
+ if service_settings is None and body is None:
+ raise ValueError("Empty value passed for parameter 'service_settings'")
+ __path_parts: t.Dict[str, str] = {
+ "task_type": _quote(task_type),
+ "amazonsagemaker_inference_id": _quote(amazonsagemaker_inference_id),
+ }
+ __path = f'/_inference/{__path_parts["task_type"]}/{__path_parts["amazonsagemaker_inference_id"]}'
+ __query: t.Dict[str, t.Any] = {}
+ __body: t.Dict[str, t.Any] = body if body is not None else {}
+ if error_trace is not None:
+ __query["error_trace"] = error_trace
+ if filter_path is not None:
+ __query["filter_path"] = filter_path
+ if human is not None:
+ __query["human"] = human
+ if pretty is not None:
+ __query["pretty"] = pretty
+ if timeout is not None:
+ __query["timeout"] = timeout
+ if not __body:
+ if service is not None:
+ __body["service"] = service
+ if service_settings is not None:
+ __body["service_settings"] = service_settings
+ if chunking_settings is not None:
+ __body["chunking_settings"] = chunking_settings
+ if task_settings is not None:
+ __body["task_settings"] = task_settings
+ if not __body:
+ __body = None # type: ignore[assignment]
+ __headers = {"accept": "application/json"}
+ if __body is not None:
+ __headers["content-type"] = "application/json"
+ return self.perform_request( # type: ignore[return-value]
+ "PUT",
+ __path,
+ params=__query,
+ headers=__headers,
+ body=__body,
+ endpoint_id="inference.put_amazonsagemaker",
+ path_parts=__path_parts,
+ )
+
@_rewrite_parameters(
body_fields=(
"service",
diff --git a/elasticsearch/_sync/client/sql.py b/elasticsearch/_sync/client/sql.py
index cde458be5..094a1b016 100644
--- a/elasticsearch/_sync/client/sql.py
+++ b/elasticsearch/_sync/client/sql.py
@@ -283,7 +283,7 @@ def query(
keep_alive: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
keep_on_completion: t.Optional[bool] = None,
page_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
- params: t.Optional[t.Mapping[str, t.Any]] = None,
+ params: t.Optional[t.Sequence[t.Any]] = None,
pretty: t.Optional[bool] = None,
query: t.Optional[str] = None,
request_timeout: t.Optional[t.Union[str, t.Literal[-1], t.Literal[0]]] = None,
diff --git a/elasticsearch/dsl/aggs.py b/elasticsearch/dsl/aggs.py
index 3b7a8e8ba..7f1019b56 100644
--- a/elasticsearch/dsl/aggs.py
+++ b/elasticsearch/dsl/aggs.py
@@ -372,6 +372,12 @@ class Boxplot(Agg[_R]):
:arg compression: Limits the maximum number of nodes used by the
underlying TDigest algorithm to `20 * compression`, enabling
control of memory usage and approximation error.
+ :arg execution_hint: The default implementation of TDigest is
+ optimized for performance, scaling to millions or even billions of
+ sample values while maintaining acceptable accuracy levels (close
+ to 1% relative error for millions of samples in some cases). To
+ use an implementation optimized for accuracy, set this parameter
+ to high_accuracy instead. Defaults to `default` if omitted.
:arg field: The field on which to run the aggregation.
:arg missing: The value to apply to documents that do not have a
value. By default, documents without a value are ignored.
@@ -384,6 +390,9 @@ def __init__(
self,
*,
compression: Union[float, "DefaultType"] = DEFAULT,
+ execution_hint: Union[
+ Literal["default", "high_accuracy"], "DefaultType"
+ ] = DEFAULT,
field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
script: Union["types.Script", Dict[str, Any], "DefaultType"] = DEFAULT,
@@ -391,6 +400,7 @@ def __init__(
):
super().__init__(
compression=compression,
+ execution_hint=execution_hint,
field=field,
missing=missing,
script=script,
@@ -1897,6 +1907,12 @@ class MedianAbsoluteDeviation(Agg[_R]):
underlying TDigest algorithm to `20 * compression`, enabling
control of memory usage and approximation error. Defaults to
`1000` if omitted.
+ :arg execution_hint: The default implementation of TDigest is
+ optimized for performance, scaling to millions or even billions of
+ sample values while maintaining acceptable accuracy levels (close
+ to 1% relative error for millions of samples in some cases). To
+ use an implementation optimized for accuracy, set this parameter
+ to high_accuracy instead. Defaults to `default` if omitted.
:arg format:
:arg field: The field on which to run the aggregation.
:arg missing: The value to apply to documents that do not have a
@@ -1910,6 +1926,9 @@ def __init__(
self,
*,
compression: Union[float, "DefaultType"] = DEFAULT,
+ execution_hint: Union[
+ Literal["default", "high_accuracy"], "DefaultType"
+ ] = DEFAULT,
format: Union[str, "DefaultType"] = DEFAULT,
field: Union[str, "InstrumentedField", "DefaultType"] = DEFAULT,
missing: Union[str, int, float, bool, "DefaultType"] = DEFAULT,
@@ -1918,6 +1937,7 @@ def __init__(
):
super().__init__(
compression=compression,
+ execution_hint=execution_hint,
format=format,
field=field,
missing=missing,
diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py
index 1aa7a4bca..c33261458 100644
--- a/elasticsearch/dsl/field.py
+++ b/elasticsearch/dsl/field.py
@@ -3849,6 +3849,14 @@ class SemanticText(Field):
by using the Update mapping API. Use the Create inference API to
create the endpoint. If not specified, the inference endpoint
defined by inference_id will be used at both index and query time.
+ :arg index_options: Settings for index_options that override any
+ defaults used by semantic_text, for example specific quantization
+ settings.
+ :arg chunking_settings: Settings for chunking text into smaller
+ passages. If specified, these will override the chunking settings
+ sent in the inference endpoint associated with inference_id. If
+ chunking settings are updated, they will not be applied to
+ existing documents until they are reindexed.
"""
name = "semantic_text"
@@ -3859,6 +3867,12 @@ def __init__(
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
inference_id: Union[str, "DefaultType"] = DEFAULT,
search_inference_id: Union[str, "DefaultType"] = DEFAULT,
+ index_options: Union[
+ "types.SemanticTextIndexOptions", Dict[str, Any], "DefaultType"
+ ] = DEFAULT,
+ chunking_settings: Union[
+ "types.ChunkingSettings", Dict[str, Any], "DefaultType"
+ ] = DEFAULT,
**kwargs: Any,
):
if meta is not DEFAULT:
@@ -3867,6 +3881,10 @@ def __init__(
kwargs["inference_id"] = inference_id
if search_inference_id is not DEFAULT:
kwargs["search_inference_id"] = search_inference_id
+ if index_options is not DEFAULT:
+ kwargs["index_options"] = index_options
+ if chunking_settings is not DEFAULT:
+ kwargs["chunking_settings"] = chunking_settings
super().__init__(*args, **kwargs)
@@ -4063,6 +4081,9 @@ def __init__(
class SparseVector(Field):
"""
:arg store:
+ :arg index_options: Additional index options for the sparse vector
+ field that controls the token pruning behavior of the sparse
+ vector field.
:arg meta: Metadata about the field.
:arg properties:
:arg ignore_above:
@@ -4081,6 +4102,9 @@ def __init__(
self,
*args: Any,
store: Union[bool, "DefaultType"] = DEFAULT,
+ index_options: Union[
+ "types.SparseVectorIndexOptions", Dict[str, Any], "DefaultType"
+ ] = DEFAULT,
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
properties: Union[Mapping[str, Field], "DefaultType"] = DEFAULT,
ignore_above: Union[int, "DefaultType"] = DEFAULT,
@@ -4095,6 +4119,8 @@ def __init__(
):
if store is not DEFAULT:
kwargs["store"] = store
+ if index_options is not DEFAULT:
+ kwargs["index_options"] = index_options
if meta is not DEFAULT:
kwargs["meta"] = meta
if properties is not DEFAULT:
diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py
index 2e616f644..b65dd1210 100644
--- a/elasticsearch/dsl/types.py
+++ b/elasticsearch/dsl/types.py
@@ -170,6 +170,48 @@ def __init__(
super().__init__(kwargs)
+class ChunkingSettings(AttrDict[Any]):
+ """
+ :arg strategy: (required) The chunking strategy: `sentence` or `word`.
+ Defaults to `sentence` if omitted.
+ :arg max_chunk_size: (required) The maximum size of a chunk in words.
+ This value cannot be higher than `300` or lower than `20` (for
+ `sentence` strategy) or `10` (for `word` strategy). Defaults to
+ `250` if omitted.
+ :arg overlap: The number of overlapping words for chunks. It is
+ applicable only to a `word` chunking strategy. This value cannot
+ be higher than half the `max_chunk_size` value. Defaults to `100`
+ if omitted.
+ :arg sentence_overlap: The number of overlapping sentences for chunks.
+ It is applicable only for a `sentence` chunking strategy. It can
+ be either `1` or `0`. Defaults to `1` if omitted.
+ """
+
+ strategy: Union[str, DefaultType]
+ max_chunk_size: Union[int, DefaultType]
+ overlap: Union[int, DefaultType]
+ sentence_overlap: Union[int, DefaultType]
+
+ def __init__(
+ self,
+ *,
+ strategy: Union[str, DefaultType] = DEFAULT,
+ max_chunk_size: Union[int, DefaultType] = DEFAULT,
+ overlap: Union[int, DefaultType] = DEFAULT,
+ sentence_overlap: Union[int, DefaultType] = DEFAULT,
+ **kwargs: Any,
+ ):
+ if strategy is not DEFAULT:
+ kwargs["strategy"] = strategy
+ if max_chunk_size is not DEFAULT:
+ kwargs["max_chunk_size"] = max_chunk_size
+ if overlap is not DEFAULT:
+ kwargs["overlap"] = overlap
+ if sentence_overlap is not DEFAULT:
+ kwargs["sentence_overlap"] = sentence_overlap
+ super().__init__(kwargs)
+
+
class ClassificationInferenceOptions(AttrDict[Any]):
"""
:arg num_top_classes: Specifies the number of top class predictions to
@@ -3119,6 +3161,26 @@ def __init__(
super().__init__(kwargs)
+class SemanticTextIndexOptions(AttrDict[Any]):
+ """
+ :arg dense_vector:
+ """
+
+ dense_vector: Union["DenseVectorIndexOptions", Dict[str, Any], DefaultType]
+
+ def __init__(
+ self,
+ *,
+ dense_vector: Union[
+ "DenseVectorIndexOptions", Dict[str, Any], DefaultType
+ ] = DEFAULT,
+ **kwargs: Any,
+ ):
+ if dense_vector is not DEFAULT:
+ kwargs["dense_vector"] = dense_vector
+ super().__init__(kwargs)
+
+
class ShapeFieldQuery(AttrDict[Any]):
"""
:arg indexed_shape: Queries using a pre-indexed shape.
@@ -3196,10 +3258,15 @@ def __init__(
class SourceFilter(AttrDict[Any]):
"""
- :arg excludes:
- :arg includes:
+ :arg exclude_vectors: If `true`, vector fields are excluded from the
+ returned source. This option takes precedence over `includes`:
+ any vector field will remain excluded even if it matches an
+ `includes` rule.
+ :arg excludes: A list of fields to exclude from the returned source.
+ :arg includes: A list of fields to include in the returned source.
"""
+ exclude_vectors: Union[bool, DefaultType]
excludes: Union[
Union[str, InstrumentedField],
Sequence[Union[str, InstrumentedField]],
@@ -3214,6 +3281,7 @@ class SourceFilter(AttrDict[Any]):
def __init__(
self,
*,
+ exclude_vectors: Union[bool, DefaultType] = DEFAULT,
excludes: Union[
Union[str, InstrumentedField],
Sequence[Union[str, InstrumentedField]],
@@ -3226,6 +3294,8 @@ def __init__(
] = DEFAULT,
**kwargs: Any,
):
+ if exclude_vectors is not DEFAULT:
+ kwargs["exclude_vectors"] = exclude_vectors
if excludes is not DEFAULT:
kwargs["excludes"] = str(excludes)
if includes is not DEFAULT:
@@ -3675,6 +3745,38 @@ def __init__(
super().__init__(kwargs)
+class SparseVectorIndexOptions(AttrDict[Any]):
+ """
+ :arg prune: Whether to perform pruning, omitting the non-significant
+ tokens from the query to improve query performance. If prune is
+ true but the pruning_config is not specified, pruning will occur
+ but default values will be used. Default: false
+ :arg pruning_config: Optional pruning configuration. If enabled, this
+ will omit non-significant tokens from the query in order to
+ improve query performance. This is only used if prune is set to
+ true. If prune is set to true but pruning_config is not specified,
+ default values will be used.
+ """
+
+ prune: Union[bool, DefaultType]
+ pruning_config: Union["TokenPruningConfig", Dict[str, Any], DefaultType]
+
+ def __init__(
+ self,
+ *,
+ prune: Union[bool, DefaultType] = DEFAULT,
+ pruning_config: Union[
+ "TokenPruningConfig", Dict[str, Any], DefaultType
+ ] = DEFAULT,
+ **kwargs: Any,
+ ):
+ if prune is not DEFAULT:
+ kwargs["prune"] = prune
+ if pruning_config is not DEFAULT:
+ kwargs["pruning_config"] = pruning_config
+ super().__init__(kwargs)
+
+
class SuggestContext(AttrDict[Any]):
"""
:arg name: (required)
@@ -3713,15 +3815,30 @@ class TDigest(AttrDict[Any]):
:arg compression: Limits the maximum number of nodes used by the
underlying TDigest algorithm to `20 * compression`, enabling
control of memory usage and approximation error.
+ :arg execution_hint: The default implementation of TDigest is
+ optimized for performance, scaling to millions or even billions of
+ sample values while maintaining acceptable accuracy levels (close
+ to 1% relative error for millions of samples in some cases). To
+ use an implementation optimized for accuracy, set this parameter
+ to high_accuracy instead. Defaults to `default` if omitted.
"""
compression: Union[int, DefaultType]
+ execution_hint: Union[Literal["default", "high_accuracy"], DefaultType]
def __init__(
- self, *, compression: Union[int, DefaultType] = DEFAULT, **kwargs: Any
+ self,
+ *,
+ compression: Union[int, DefaultType] = DEFAULT,
+ execution_hint: Union[
+ Literal["default", "high_accuracy"], DefaultType
+ ] = DEFAULT,
+ **kwargs: Any,
):
if compression is not DEFAULT:
kwargs["compression"] = compression
+ if execution_hint is not DEFAULT:
+ kwargs["execution_hint"] = execution_hint
super().__init__(kwargs)
@@ -4444,7 +4561,7 @@ class ArrayPercentilesItem(AttrDict[Any]):
:arg value_as_string:
"""
- key: str
+ key: float
value: Union[float, None]
value_as_string: str
@@ -5290,7 +5407,9 @@ class HdrPercentileRanksAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
@@ -5300,7 +5419,9 @@ class HdrPercentilesAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
@@ -5809,7 +5930,9 @@ class PercentilesBucketAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
@@ -6344,7 +6467,9 @@ class TDigestPercentileRanksAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]
@@ -6354,7 +6479,9 @@ class TDigestPercentilesAggregate(AttrDict[Any]):
:arg meta:
"""
- values: Union[Mapping[str, Union[str, int, None]], Sequence["ArrayPercentilesItem"]]
+ values: Union[
+ Mapping[str, Union[str, float, None]], Sequence["ArrayPercentilesItem"]
+ ]
meta: Mapping[str, Any]