-
Notifications
You must be signed in to change notification settings - Fork 25.8k
Add Mixedbread AI Rerank support #140477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Add Mixedbread AI Rerank support #140477
Changes from 27 commits
f52f5e2
0a184ca
dc1f701
6133d64
6b63ffb
2cd922c
9ec0f7d
3fd0d23
4cb12db
18d5ce4
fd98ef0
5bbffe1
83a6497
fbd6d5d
f4c5c0d
c72e6a2
faa5ce8
36f7994
87f09e5
fa208ee
5ed8e6a
6c3bc8f
cd94963
81f9aca
e447752
59d97d8
68108cf
2ab34e0
65f2b64
a20144f
9b1a82c
84aab0e
035c7a3
64956c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| pr: 140477 | ||
| summary: "[ML] Add Mixedbread Rerank support to the Inference Plugin" | ||
| area: Machine Learning | ||
| type: enhancement | ||
| issues: [] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| 9270000 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| esql_view_queries,9269000 | ||
| ml_inference_mixedbread_added,9270000 |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,33 @@ | ||||||
| /* | ||||||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||||||
| * or more contributor license agreements. Licensed under the Elastic License | ||||||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||||||
| * 2.0. | ||||||
| */ | ||||||
|
|
||||||
| package org.elasticsearch.xpack.inference.services.mixedbread; | ||||||
|
|
||||||
| import org.apache.http.client.utils.URIBuilder; | ||||||
|
|
||||||
| public class MixedbreadConstants { | ||||||
| public static final String HOST = "api.mixedbread.com"; | ||||||
| public static final String VERSION_1 = "v1"; | ||||||
| public static final String RERANK_PATH = "rerank"; | ||||||
|
||||||
| public static final String RERANK_PATH = "rerank"; | |
| public static final String RERANK_PATH = "reranking"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mixedbread supports both. I left "rerank". Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While both may work, the documentation for Mixedbread reranking uses reranking as the endpoint, so that's what we should be using.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|
|
||
| package org.elasticsearch.xpack.inference.services.mixedbread; | ||
|
|
||
| import org.elasticsearch.common.settings.SecureString; | ||
| import org.elasticsearch.core.Nullable; | ||
| import org.elasticsearch.inference.ModelConfigurations; | ||
| import org.elasticsearch.inference.ModelSecrets; | ||
| import org.elasticsearch.inference.ServiceSettings; | ||
| import org.elasticsearch.inference.TaskSettings; | ||
| import org.elasticsearch.xpack.inference.external.action.ExecutableAction; | ||
| import org.elasticsearch.xpack.inference.services.RateLimitGroupingModel; | ||
| import org.elasticsearch.xpack.inference.services.ServiceUtils; | ||
| import org.elasticsearch.xpack.inference.services.mixedbread.action.MixedbreadActionVisitor; | ||
| import org.elasticsearch.xpack.inference.services.settings.ApiKeySecrets; | ||
| import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; | ||
|
|
||
| import java.net.URI; | ||
| import java.util.Map; | ||
| import java.util.Objects; | ||
|
|
||
| /** | ||
| * Abstract class representing a Mixedbread model for inference. | ||
| * This class extends RateLimitGroupingModel and provides common functionality for Mixedbread models. | ||
| */ | ||
| public abstract class MixedbreadModel extends RateLimitGroupingModel { | ||
| private final SecureString apiKey; | ||
| private final RateLimitSettings rateLimitServiceSettings; | ||
| protected URI uri; | ||
|
|
||
| public MixedbreadModel( | ||
| ModelConfigurations configurations, | ||
| ModelSecrets secrets, | ||
| @Nullable ApiKeySecrets apiKeySecrets, | ||
| RateLimitSettings rateLimitServiceSettings | ||
| ) { | ||
| super(configurations, secrets); | ||
|
|
||
| this.rateLimitServiceSettings = Objects.requireNonNull(rateLimitServiceSettings); | ||
| apiKey = ServiceUtils.apiKey(apiKeySecrets); | ||
| } | ||
|
|
||
| protected MixedbreadModel(MixedbreadModel model, TaskSettings taskSettings) { | ||
| super(model, taskSettings); | ||
|
|
||
| rateLimitServiceSettings = model.rateLimitServiceSettings(); | ||
| apiKey = model.apiKey(); | ||
| } | ||
|
|
||
| protected MixedbreadModel(MixedbreadModel model, ServiceSettings serviceSettings) { | ||
| super(model, serviceSettings); | ||
|
|
||
| rateLimitServiceSettings = model.rateLimitServiceSettings(); | ||
| apiKey = model.apiKey(); | ||
| } | ||
|
|
||
| public SecureString apiKey() { | ||
| return apiKey; | ||
| } | ||
|
|
||
| public RateLimitSettings rateLimitServiceSettings() { | ||
| return rateLimitServiceSettings; | ||
| } | ||
|
|
||
| public abstract ExecutableAction accept(MixedbreadActionVisitor creator, Map<String, Object> taskSettings); | ||
|
|
||
| public URI uri() { | ||
| return uri; | ||
| } | ||
|
|
||
| public RateLimitSettings rateLimitSettings() { | ||
| return rateLimitServiceSettings; | ||
| } | ||
|
|
||
| public int rateLimitGroupingHash() { | ||
| return apiKey().hashCode(); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The area should be "Inference" rather than "Machine Learning"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Replaced: [ML] -> [Inference API]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's also a good change, but I was referring to the
areafield, which should beInference, notMachine Learning.