Skip to content
Open
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
f52f5e2
Add Mixedbread AI Rerank support
Evgenii-Kazannik Jan 7, 2026
0a184ca
Add Mixedbread AI Rerank support tests
Evgenii-Kazannik Jan 12, 2026
dc1f701
Merge branch 'main' into Add-Mixedbread-AI-Rerank-support
Evgenii-Kazannik Jan 13, 2026
6133d64
Apply spotless
Evgenii-Kazannik Jan 13, 2026
6b63ffb
Add Mixedbread AI Rerank support
Evgenii-Kazannik Jan 14, 2026
2cd922c
Merge branch 'main' into Add-Mixedbread-AI-Rerank-support
Evgenii-Kazannik Jan 14, 2026
9ec0f7d
Add action creator tests
Evgenii-Kazannik Jan 20, 2026
3fd0d23
Merge branch 'main' into Add-Mixedbread-AI-Rerank-support
Evgenii-Kazannik Jan 20, 2026
4cb12db
Make windows size configurable
Evgenii-Kazannik Jan 22, 2026
18d5ce4
Address comments and add service tests
Evgenii-Kazannik Jan 22, 2026
fd98ef0
Merge branch 'main' into Add-Mixedbread-AI-Rerank-support
Evgenii-Kazannik Jan 27, 2026
5bbffe1
[CI] Update transport version definitions
Jan 27, 2026
83a6497
Switch to new approach for transport version
Evgenii-Kazannik Jan 28, 2026
fbd6d5d
Use ConstructingObjectParser
Evgenii-Kazannik Jan 28, 2026
f4c5c0d
Address comments
Evgenii-Kazannik Jan 28, 2026
c72e6a2
Merge remote-tracking branch 'origin/Add-Mixedbread-AI-Rerank-support…
Evgenii-Kazannik Jan 28, 2026
faa5ce8
[CI] Auto commit changes from spotless
Jan 28, 2026
36f7994
Fix the test
Evgenii-Kazannik Jan 28, 2026
87f09e5
Checkstyle fix
Evgenii-Kazannik Jan 28, 2026
fa208ee
Fix the test
Evgenii-Kazannik Jan 28, 2026
5ed8e6a
Clean up
Evgenii-Kazannik Jan 29, 2026
6c3bc8f
Clean up
Evgenii-Kazannik Jan 29, 2026
cd94963
Merge branch 'main' into Add-Mixedbread-AI-Rerank-support
Evgenii-Kazannik Jan 29, 2026
81f9aca
[CI] Update transport version definitions
Jan 29, 2026
e447752
ci: retrigger
Evgenii-Kazannik Jan 29, 2026
59d97d8
Merge branch 'main' into Add-Mixedbread-AI-Rerank-support
Evgenii-Kazannik Jan 30, 2026
68108cf
[CI] Update transport version definitions
Jan 30, 2026
2ab34e0
Address comments and refactor
Evgenii-Kazannik Feb 1, 2026
65f2b64
Merge branch 'main' into Add-Mixedbread-AI-Rerank-support
Evgenii-Kazannik Feb 2, 2026
a20144f
[CI] Update transport version definitions
Feb 2, 2026
9b1a82c
Address comments
Evgenii-Kazannik Feb 2, 2026
84aab0e
[CI] Update transport version definitions
Feb 3, 2026
035c7a3
Merge branch 'main' into Add-Mixedbread-AI-Rerank-support
Evgenii-Kazannik Feb 3, 2026
64956c1
Adjust to a new model creation approach
Evgenii-Kazannik Feb 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/140477.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 140477
summary: "[Inference API] Add Mixedbread Rerank support to the Inference Plugin"
area: Machine Learning
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The area should be "Inference" rather than "Machine Learning"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced: [ML] -> [Inference API]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's also a good change, but I was referring to the area field, which should be Inference, not Machine Learning.

type: enhancement
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
9271000
2 changes: 1 addition & 1 deletion server/src/main/resources/transport/upper_bounds/9.4.csv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
bulk_by_scroll_request_includes_relocation_field,9270000
ml_inference_mixedbread_added,9271000
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ public void testGetServicesWithoutTaskType() throws IOException {
"text_embedding_test_service",
"voyageai",
"watsonxai",
"amazon_sagemaker"
"amazon_sagemaker",
"mixedbread"
).toArray()
)
);
Expand Down Expand Up @@ -145,6 +146,7 @@ public void testGetServicesWithRerankTaskType() throws IOException {
"elasticsearch",
"googlevertexai",
"jinaai",
"mixedbread",
"nvidia",
"openshift_ai",
"test_reranking_service",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@
import org.elasticsearch.xpack.inference.services.llama.embeddings.LlamaEmbeddingsServiceSettings;
import org.elasticsearch.xpack.inference.services.mistral.completion.MistralChatCompletionServiceSettings;
import org.elasticsearch.xpack.inference.services.mistral.embeddings.MistralEmbeddingsServiceSettings;
import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankServiceSettings;
import org.elasticsearch.xpack.inference.services.mixedbread.rerank.MixedbreadRerankTaskSettings;
import org.elasticsearch.xpack.inference.services.nvidia.completion.NvidiaChatCompletionServiceSettings;
import org.elasticsearch.xpack.inference.services.nvidia.embeddings.NvidiaEmbeddingsServiceSettings;
import org.elasticsearch.xpack.inference.services.nvidia.embeddings.NvidiaEmbeddingsTaskSettings;
Expand Down Expand Up @@ -187,6 +189,7 @@ public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
addAi21NamedWriteables(namedWriteables);
addOpenShiftAiNamedWriteables(namedWriteables);
addNvidiaNamedWriteables(namedWriteables);
addMixedbreadNamedWriteables(namedWriteables);

addUnifiedNamedWriteables(namedWriteables);

Expand Down Expand Up @@ -942,4 +945,17 @@ private static void addElasticNamedWriteables(List<NamedWriteableRegistry.Entry>
)
);
}

private static void addMixedbreadNamedWriteables(List<NamedWriteableRegistry.Entry> namedWriteables) {
namedWriteables.add(
new NamedWriteableRegistry.Entry(
ServiceSettings.class,
MixedbreadRerankServiceSettings.NAME,
MixedbreadRerankServiceSettings::new
)
);
namedWriteables.add(
new NamedWriteableRegistry.Entry(TaskSettings.class, MixedbreadRerankTaskSettings.NAME, MixedbreadRerankTaskSettings::new)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@
import org.elasticsearch.xpack.inference.services.jinaai.JinaAIService;
import org.elasticsearch.xpack.inference.services.llama.LlamaService;
import org.elasticsearch.xpack.inference.services.mistral.MistralService;
import org.elasticsearch.xpack.inference.services.mixedbread.MixedbreadService;
import org.elasticsearch.xpack.inference.services.nvidia.NvidiaService;
import org.elasticsearch.xpack.inference.services.openai.OpenAiService;
import org.elasticsearch.xpack.inference.services.openshiftai.OpenShiftAiService;
Expand Down Expand Up @@ -561,6 +562,7 @@ public List<InferenceServiceExtension.Factory> getInferenceServiceFactories() {
context -> new GoogleAiStudioService(httpFactory.get(), serviceComponents.get(), context),
context -> new GoogleVertexAiService(httpFactory.get(), serviceComponents.get(), context),
context -> new MistralService(httpFactory.get(), serviceComponents.get(), context),
context -> new MixedbreadService(httpFactory.get(), serviceComponents.get(), context),
context -> new AnthropicService(httpFactory.get(), serviceComponents.get(), context),
context -> new AmazonBedrockService(httpFactory.get(), amazonBedrockFactory.get(), serviceComponents.get(), context),
context -> new AlibabaCloudSearchService(httpFactory.get(), serviceComponents.get(), context),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,8 @@ public static RestStatus toRestStatus(int statusCode) {

return code == null ? RestStatus.BAD_REQUEST : code;
}

protected static String resourceNotFoundError(Request request) {
return format("Resource not found at [%s]", request.getURI());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
import java.io.IOException;
import java.util.concurrent.Flow;

import static org.elasticsearch.core.Strings.format;

public class GoogleAiStudioResponseHandler extends BaseResponseHandler {

static final String GOOGLE_AI_STUDIO_UNAVAILABLE = "The Google AI Studio service may be temporarily overloaded or down";
Expand Down Expand Up @@ -82,10 +80,6 @@ protected void checkForFailureStatusCode(Request request, HttpResult result) thr
}
}

private static String resourceNotFoundError(Request request) {
return format("Resource not found at [%s]", request.getURI());
}

@Override
public InferenceServiceResults parseResult(Request request, Flow.Publisher<HttpResult> flow) {
var serverSentEventProcessor = new ServerSentEventProcessor(new ServerSentEventParser());
Expand All @@ -94,5 +88,4 @@ public InferenceServiceResults parseResult(Request request, Flow.Publisher<HttpR
serverSentEventProcessor.subscribe(googleAiProcessor);
return new StreamingChatCompletionResults(googleAiProcessor);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
import java.util.concurrent.Flow;
import java.util.function.Function;

import static org.elasticsearch.core.Strings.format;

public class GoogleVertexAiResponseHandler extends BaseResponseHandler {

static final String GOOGLE_VERTEX_AI_UNAVAILABLE = "The Google Vertex AI service may be temporarily overloaded or down";
Expand Down Expand Up @@ -68,10 +66,6 @@ protected void checkForFailureStatusCode(Request request, HttpResult result) thr
}
}

private static String resourceNotFoundError(Request request) {
return format("Resource not found at [%s]", request.getURI());
}

@Override
public InferenceServiceResults parseResult(Request request, Flow.Publisher<HttpResult> flow) {
var serverSentEventProcessor = new ServerSentEventProcessor(new ServerSentEventParser());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
import org.elasticsearch.xpack.inference.external.request.Request;
import org.elasticsearch.xpack.inference.services.ibmwatsonx.response.IbmWatsonxErrorResponseEntity;

import static org.elasticsearch.core.Strings.format;

public class IbmWatsonxResponseHandler extends BaseResponseHandler {
public IbmWatsonxResponseHandler(String requestType, ResponseParser parseFunction) {
super(requestType, parseFunction, IbmWatsonxErrorResponseEntity::fromResponse);
Expand Down Expand Up @@ -53,8 +51,4 @@ protected void checkForFailureStatusCode(Request request, HttpResult result) thr
throw new RetryException(false, buildError(UNSUCCESSFUL, request, result));
}
}

private static String resourceNotFoundError(Request request) {
return format("Resource not found at [%s]", request.getURI());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.inference.services.mixedbread;

import org.elasticsearch.common.settings.SecureString;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.inference.ModelConfigurations;
import org.elasticsearch.inference.ModelSecrets;
import org.elasticsearch.inference.TaskSettings;
import org.elasticsearch.xpack.inference.external.action.ExecutableAction;
import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel;
import org.elasticsearch.xpack.inference.services.ServiceUtils;
import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor;
import org.elasticsearch.xpack.inference.services.settings.ApiKeySecrets;
import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings;

import java.net.URI;
import java.util.Map;
import java.util.Objects;

/**
* Abstract class representing a Mixedbread model for inference.
* This class extends RateLimitGroupingModel and provides common functionality for Mixedbread models.
*/
public abstract class MixedbreadModel extends RateLimitGroupingModel {
private final SecureString apiKey;
private final RateLimitSettings rateLimitServiceSettings;
protected URI uri;

public MixedbreadModel(
ModelConfigurations configurations,
ModelSecrets secrets,
@Nullable ApiKeySecrets apiKeySecrets,
RateLimitSettings rateLimitServiceSettings,
URI uri
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The uri field should be made private final instead of protected.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

) {
super(configurations, secrets);

this.rateLimitServiceSettings = Objects.requireNonNull(rateLimitServiceSettings);
apiKey = ServiceUtils.apiKey(apiKeySecrets);
this.uri = uri;
}

protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) {
super(model, taskSettings);

rateLimitServiceSettings = model.rateLimitServiceSettings();
apiKey = model.apiKey();
uri = model.uri();
}

public SecureString apiKey() {
return apiKey;
}

public RateLimitSettings rateLimitServiceSettings() {
return rateLimitServiceSettings;
}

public abstract ExecutableAction accept(MixedbreadActionVisitor creator, Map<String, Object> taskSettings);

public URI uri() {
return uri;
}

public RateLimitSettings rateLimitSettings() {
return rateLimitServiceSettings;
}

public int rateLimitGroupingHash() {
return apiKey().hashCode();
}
}
Loading