-
Notifications
You must be signed in to change notification settings - Fork 74
MLE-22706 Added encode/decode support for vectors #1789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
56 changes: 56 additions & 0 deletions
56
marklogic-client-api/src/main/java/com/marklogic/client/util/VectorUtil.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
/* | ||
* Copyright (c) 2010-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved. | ||
*/ | ||
package com.marklogic.client.util; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.nio.ByteOrder; | ||
import java.util.Base64; | ||
|
||
/** | ||
* Supports encoding and decoding vectors using the same approach as the vec:base64-encode and vec:base64-decode | ||
* functions supported by the MarkLogic server. | ||
* | ||
* @since 7.2.0 | ||
*/ | ||
public interface VectorUtil { | ||
|
||
/** | ||
* @param vector | ||
* @return a base64-encoded string representing the vector and using the same approach as the vec:base64-encode | ||
* function supported by the MarkLogic server. | ||
*/ | ||
static String base64Encode(float... vector) { | ||
final int dimensions = vector.length; | ||
ByteBuffer buffer = ByteBuffer.allocate(8 + 4 * dimensions); | ||
buffer.order(ByteOrder.LITTLE_ENDIAN); | ||
buffer.putInt(0); // version | ||
buffer.putInt(dimensions); | ||
for (float v : vector) { | ||
buffer.putFloat(v); | ||
} | ||
return Base64.getEncoder().encodeToString(buffer.array()); | ||
} | ||
|
||
/** | ||
* @param encodedVector | ||
* @return a vector represented by the base64-encoded string and using the same approach as the vec:base64-decode | ||
* function supported by the MarkLogic server. | ||
*/ | ||
static float[] base64Decode(String encodedVector) { | ||
ByteBuffer buffer = ByteBuffer.wrap(Base64.getDecoder().decode(encodedVector)); | ||
buffer.order(ByteOrder.LITTLE_ENDIAN); | ||
|
||
final int version = buffer.getInt(); | ||
if (version != 0) { | ||
throw new IllegalArgumentException("Unsupported vector version: " + version); | ||
} | ||
|
||
final int dimensions = buffer.getInt(); | ||
float[] vector = new float[dimensions]; | ||
for (int i = 0; i < dimensions; i++) { | ||
vector[i] = buffer.getFloat(); | ||
} | ||
return vector; | ||
} | ||
} |
58 changes: 58 additions & 0 deletions
58
marklogic-client-api/src/test/java/com/marklogic/client/util/VectorUtilTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* | ||
* Copyright (c) 2010-2025 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved. | ||
*/ | ||
package com.marklogic.client.util; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.node.ArrayNode; | ||
import com.marklogic.client.test.Common; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
class VectorUtilTest { | ||
|
||
private final float[] VECTOR = new float[]{3.14f, 1.59f, 2.65f}; | ||
private final double ACCEPTABLE_DELTA = 0.0001; | ||
|
||
@Test | ||
void encodeAndDecodeWithJavaClient() { | ||
String encoded = VectorUtil.base64Encode(VECTOR); | ||
assertEquals("AAAAAAMAAADD9UhAH4XLP5qZKUA=", encoded); | ||
|
||
float[] decoded = VectorUtil.base64Decode(encoded); | ||
assertEquals(VECTOR.length, decoded.length); | ||
for (int i = 0; i < VECTOR.length; i++) { | ||
assertEquals(VECTOR[i], decoded[i], ACCEPTABLE_DELTA); | ||
} | ||
} | ||
|
||
@Test | ||
void encodeAndDecodeWithServer() { | ||
String encoded = VectorUtil.base64Encode(VECTOR); | ||
assertEquals("AAAAAAMAAADD9UhAH4XLP5qZKUA=", encoded); | ||
|
||
ArrayNode decoded = (ArrayNode) Common.newEvalClient().newServerEval() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of casting from Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback |
||
.xquery("vec:base64-decode('%s')".formatted(encoded)) | ||
.evalAs(JsonNode.class); | ||
|
||
assertEquals(3, decoded.size()); | ||
assertEquals(3.14f, decoded.get(0).asDouble(), ACCEPTABLE_DELTA); | ||
assertEquals(1.59f, decoded.get(1).asDouble(), ACCEPTABLE_DELTA); | ||
assertEquals(2.65f, decoded.get(2).asDouble(), ACCEPTABLE_DELTA); | ||
} | ||
|
||
@Test | ||
void encodeWithServerAndDecodeWithJavaClient() { | ||
String encoded = Common.newEvalClient().newServerEval() | ||
.xquery("vec:base64-encode(vec:vector((3.14, 1.59, 2.65)))") | ||
.evalAs(String.class); | ||
assertEquals("AAAAAAMAAADD9UhAH4XLP5qZKUA=", encoded); | ||
|
||
float[] decoded = VectorUtil.base64Decode(encoded); | ||
assertEquals(VECTOR.length, decoded.length); | ||
for (int i = 0; i < VECTOR.length; i++) { | ||
assertEquals(VECTOR[i], decoded[i], ACCEPTABLE_DELTA); | ||
} | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use the same named constant instead of the literal
0
when checking the version to maintain consistency with the encoding logic.Copilot uses AI. Check for mistakes.