From 29f9145687daddf76ef6787ad26406046aba94c2 Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Mon, 30 Jun 2025 21:41:34 +0900 Subject: [PATCH 01/17] Add EmbeddingValue union type and Base64 support for embeddings This implementation adds support for Base64-encoded embeddings as the default response format, while maintaining complete backward compatibility with existing `List` usage. ### 1. Default Behavior Change - **New Default**: Embedding requests now default to Base64 encoding format - **Backward Compatibility**: Existing code using `embedding()` method continues to work unchanged - **Performance**: Base64 encoding reduces network payload size significantly Introduces the EmbeddingValue class to support both float list and base64-encoded embedding data, enabling efficient handling and backward compatibility. Embedding, EmbeddingCreateParams, and related classes are updated to use EmbeddingValue, with automatic decoding and encoding between formats. Adds EmbeddingDefaults for global default encoding configuration, and comprehensive tests for new behaviors and compatibility. --- .../com/openai/core/EmbeddingDefaults.kt | 1 + .../com/openai/models/embeddings/Embedding.kt | 100 ++++++- .../embeddings/EmbeddingCreateParams.kt | 19 +- .../models/embeddings/EmbeddingDefaults.kt | 57 ++++ .../models/embeddings/EmbeddingValue.kt | 265 ++++++++++++++++++ .../models/embeddings/EmbeddingDebugTest.kt | 62 ++++ .../embeddings/EmbeddingDefaultsManualTest.kt | 49 ++++ .../models/embeddings/EmbeddingStepTest.kt | 58 ++++ .../EmbeddingValueIntegrationTest.kt | 263 +++++++++++++++++ 9 files changed, 862 insertions(+), 12 deletions(-) create mode 100644 openai-java-core/src/main/kotlin/com/openai/core/EmbeddingDefaults.kt create mode 100644 openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingDefaults.kt create mode 100644 openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt create mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDebugTest.kt create mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDefaultsManualTest.kt create mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt create mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt diff --git a/openai-java-core/src/main/kotlin/com/openai/core/EmbeddingDefaults.kt b/openai-java-core/src/main/kotlin/com/openai/core/EmbeddingDefaults.kt new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/openai-java-core/src/main/kotlin/com/openai/core/EmbeddingDefaults.kt @@ -0,0 +1 @@ + diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt index 8192bc190..8ded8a538 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt @@ -22,6 +22,7 @@ import kotlin.jvm.optionals.getOrNull class Embedding private constructor( private val embedding: JsonField>, + private val embeddingValue: JsonField?, private val index: JsonField, private val object_: JsonValue, private val additionalProperties: MutableMap, @@ -31,19 +32,52 @@ private constructor( private constructor( @JsonProperty("embedding") @ExcludeMissing - embedding: JsonField> = JsonMissing.of(), + embedding: JsonField = JsonMissing.of(), @JsonProperty("index") @ExcludeMissing index: JsonField = JsonMissing.of(), @JsonProperty("object") @ExcludeMissing object_: JsonValue = JsonMissing.of(), - ) : this(embedding, index, object_, mutableMapOf()) + ) : this( + JsonMissing.of(), // Legacy embedding field will be populated from embeddingValue + embedding, + index, + object_, + mutableMapOf(), + ) /** * The embedding vector, which is a list of floats. The length of vector depends on the model as * listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings). * + * Important: When Base64 data is received, it is automatically decoded and returned as + * List + * + * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is + * unexpectedly missing or null (e.g. if the server responded with an unexpected value). + */ + fun embedding(): List = + when { + embeddingValue != null -> + embeddingValue + .getRequired("embedding") + .asFloatList() // Base64→Float auto conversion + !embedding.isMissing() -> + embedding.getRequired("embedding") // Original Float format data + else -> throw OpenAIInvalidDataException("Embedding data is missing") + } + + /** + * The embedding data in its original format (either float list or base64 string). This method + * provides efficient access to the embedding data without unnecessary conversions. + * + * @return EmbeddingValue containing the embedding data in its original format * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is * unexpectedly missing or null (e.g. if the server responded with an unexpected value). */ - fun embedding(): List = embedding.getRequired("embedding") + fun embeddingValue(): EmbeddingValue = + when { + embeddingValue != null -> embeddingValue.getRequired("embedding") + !embedding.isMissing() -> EmbeddingValue.ofFloatList(embedding.getRequired("embedding")) + else -> throw OpenAIInvalidDataException("Embedding data is missing") + } /** * The index of the embedding in the list of embeddings. @@ -71,7 +105,15 @@ private constructor( * * Unlike [embedding], this method doesn't throw if the JSON field has an unexpected type. */ - @JsonProperty("embedding") @ExcludeMissing fun _embedding(): JsonField> = embedding + @JsonProperty("embedding") + @ExcludeMissing + fun _embedding(): JsonField = + when { + embeddingValue != null -> embeddingValue + !embedding.isMissing() -> + JsonField.of(EmbeddingValue.ofFloatList(embedding.getRequired("embedding"))) + else -> JsonMissing.of() + } /** * Returns the raw JSON value of [index]. @@ -116,7 +158,12 @@ private constructor( @JvmSynthetic internal fun from(embedding: Embedding) = apply { - this.embedding = embedding.embedding.map { it.toMutableList() } + try { + this.embedding = JsonField.of(embedding.embedding().toMutableList()) + } catch (e: Exception) { + // Fallback to field-level copying if embedding() method fails + this.embedding = embedding.embedding.map { it.toMutableList() } + } index = embedding.index object_ = embedding.object_ additionalProperties = embedding.additionalProperties.toMutableMap() @@ -212,6 +259,7 @@ private constructor( fun build(): Embedding = Embedding( checkRequired("embedding", embedding).map { it.toImmutable() }, + null, // embeddingValue - will be null for builder-created instances checkRequired("index", index), object_, additionalProperties.toMutableMap(), @@ -225,7 +273,7 @@ private constructor( return@apply } - embedding() + embedding() // This will call the method that returns List index() _object_().let { if (it != JsonValue.from("embedding")) { @@ -250,7 +298,11 @@ private constructor( */ @JvmSynthetic internal fun validity(): Int = - (embedding.asKnown().getOrNull()?.size ?: 0) + + when { + embeddingValue != null -> embeddingValue.asKnown().getOrNull()?.validity() ?: 0 + !embedding.isMissing() -> embedding.asKnown().getOrNull()?.size ?: 0 + else -> 0 + } + (if (index.asKnown().isPresent) 1 else 0) + object_.let { if (it == JsonValue.from("embedding")) 1 else 0 } @@ -259,15 +311,43 @@ private constructor( return true } - return /* spotless:off */ other is Embedding && embedding == other.embedding && index == other.index && object_ == other.object_ && additionalProperties == other.additionalProperties /* spotless:on */ + if (other !is Embedding) { + return false + } + + return try { + embedding() == other.embedding() && + index == other.index && + object_ == other.object_ && + additionalProperties == other.additionalProperties + } catch (e: Exception) { + // Fallback to field-level comparison if embedding() methods fail + embedding == other.embedding && + embeddingValue == other.embeddingValue && + index == other.index && + object_ == other.object_ && + additionalProperties == other.additionalProperties + } } /* spotless:off */ - private val hashCode: Int by lazy { Objects.hash(embedding, index, object_, additionalProperties) } + private val hashCode: Int by lazy { + try { + Objects.hash(embedding(), index, object_, additionalProperties) + } catch (e: Exception) { + // Fallback to field-level hashing if embedding() method fails + Objects.hash(embedding, embeddingValue, index, object_, additionalProperties) + } + } /* spotless:on */ override fun hashCode(): Int = hashCode override fun toString() = - "Embedding{embedding=$embedding, index=$index, object_=$object_, additionalProperties=$additionalProperties}" + when { + embeddingValue != null -> + "Embedding{embedding=${try { embedding() } catch (e: Exception) { "[]" }}, index=$index, object_=$object_, additionalProperties=$additionalProperties}" + else -> + "Embedding{embedding=${embedding.asKnown().getOrNull() ?: emptyList()}, index=$index, object_=$object_, additionalProperties=$additionalProperties}" + } } diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt index 6ea30e456..40b424923 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt @@ -78,6 +78,9 @@ private constructor( * The format to return the embeddings in. Can be either `float` or * [`base64`](https://pypi.org/project/pybase64/). * + * Returns the encoding format that was set (either explicitly or via default) when this + * EmbeddingCreateParams instance was built. + * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the * server responded with an unexpected value). */ @@ -418,12 +421,18 @@ private constructor( * * @throws IllegalStateException if any required field is unset. */ - fun build(): EmbeddingCreateParams = - EmbeddingCreateParams( + fun build(): EmbeddingCreateParams { + // Apply default encoding format if not explicitly set + if (body._encodingFormat().isMissing()) { + body.encodingFormat(EmbeddingDefaults.defaultEncodingFormat) + } + + return EmbeddingCreateParams( body.build(), additionalHeaders.build(), additionalQueryParams.build(), ) + } } fun _body(): Body = body @@ -724,6 +733,12 @@ private constructor( keys.forEach(::removeAdditionalProperty) } + /** + * Internal method to check if encodingFormat has been set. Used by the main Builder to + * determine if default should be applied. + */ + internal fun _encodingFormat(): JsonField = encodingFormat + /** * Returns an immutable instance of [Body]. * diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingDefaults.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingDefaults.kt new file mode 100644 index 000000000..251e4f8a3 --- /dev/null +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingDefaults.kt @@ -0,0 +1,57 @@ +// File generated from our OpenAPI spec by Stainless. + +package com.openai.models.embeddings + +import com.openai.models.embeddings.EmbeddingCreateParams.EncodingFormat + +/** + * Configuration object for default embedding behavior. This allows users to change the default + * encoding format globally. + * + * By default, Base64 encoding is used for optimal performance and reduced network bandwidth. Users + * can explicitly choose float encoding when direct float access is needed. + */ +object EmbeddingDefaults { + + @JvmStatic + @get:JvmName("getDefaultEncodingFormat") + @set:JvmName("setDefaultEncodingFormat") + var defaultEncodingFormat: EncodingFormat = EncodingFormat.BASE64 // Default is Base64 + private set + + /** + * Set the default encoding format for embeddings. This will be applied when no explicit format + * is specified in EmbeddingCreateParams. + * + * @param format the encoding format to use as default + */ + @JvmStatic + fun setDefaultEncodingFormat(format: EncodingFormat) { + defaultEncodingFormat = format + } + + /** + * Reset the default encoding format to Base64 (the recommended default). Base64 encoding + * provides better performance and reduced network bandwidth usage. + */ + @JvmStatic + fun resetToDefaults() { + defaultEncodingFormat = EncodingFormat.BASE64 + } + + /** + * Configure the system to use float encoding as default. This is primarily for backward + * compatibility scenarios. Note: Float encoding uses more network bandwidth and may impact + * performance. For most use cases, the default base64 encoding is recommended. + */ + @JvmStatic + fun enableLegacyFloatDefaults() { + defaultEncodingFormat = EncodingFormat.FLOAT + } + + /** Returns true if the current default encoding format is BASE64. */ + @JvmStatic fun isUsingBase64Defaults(): Boolean = defaultEncodingFormat == EncodingFormat.BASE64 + + /** Returns true if the current default encoding format is FLOAT. */ + @JvmStatic fun isUsingFloatDefaults(): Boolean = defaultEncodingFormat == EncodingFormat.FLOAT +} diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt new file mode 100644 index 000000000..acb4e3c97 --- /dev/null +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt @@ -0,0 +1,265 @@ +// File generated from our OpenAPI spec by Stainless. + +package com.openai.models.embeddings + +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.core.ObjectCodec +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.SerializerProvider +import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import com.fasterxml.jackson.databind.annotation.JsonSerialize +import com.fasterxml.jackson.module.kotlin.jacksonTypeRef +import com.openai.core.BaseDeserializer +import com.openai.core.BaseSerializer +import com.openai.core.JsonValue +import com.openai.core.allMaxBy +import com.openai.errors.OpenAIInvalidDataException +import java.nio.ByteBuffer +import java.util.Base64 +import java.util.Objects + +/** + * Represents embedding data that can be either a list of floats or base64-encoded string. This + * union type allows for efficient handling of both formats. + * + * This class is immutable - all instances are thread-safe and cannot be modified after creation. + */ +@JsonDeserialize(using = EmbeddingValue.Deserializer::class) +@JsonSerialize(using = EmbeddingValue.Serializer::class) +class EmbeddingValue +private constructor( + private val floatList: List? = null, + private val base64String: String? = null, + private val _json: JsonValue? = null, +) { + + /** Returns the embedding as a list of floats, or null if this value represents base64 data. */ + fun floatList(): List? = floatList + + /** + * Returns the embedding as a base64-encoded string, or null if this value represents float + * data. + */ + fun base64String(): String? = base64String + + /** Returns true if this value contains float list data. */ + fun isFloatList(): Boolean = floatList != null + + /** Returns true if this value contains base64 string data. */ + fun isBase64String(): Boolean = base64String != null + + /** + * Returns the embedding data as a list of floats. + * + * **Important feature: Automatic Base64 decoding** This method is the core part of backward + * compatibility. When data is stored in Base64 format, it automatically decodes and returns + * List, so existing user code requires no changes. + * + * Processing flow: + * - Float format data → Return as-is + * - Base64 format data → Automatically decode and return as List + * + * @return Decoded embedding data in List format + */ + fun asFloatList(): List = + when { + floatList != null -> floatList + base64String != null -> + decodeBase64ToFloatList(base64String) // Automatic Base64 decoding + else -> throw IllegalStateException("No valid embedding data") + } + + /** + * Returns the embedding data as a base64-encoded string. If the data is a float list, it will + * be encoded automatically. + */ + fun asBase64String(): String = + when { + base64String != null -> base64String + floatList != null -> encodeFloatListToBase64(floatList) + else -> throw IllegalStateException("No valid embedding data") + } + + /** Returns the raw JSON value for debugging purposes. */ + fun _json(): JsonValue? = _json + + /** Accepts a visitor that can handle both float list and base64 string cases. */ + fun accept(visitor: Visitor): T = + when { + floatList != null -> visitor.visitFloatList(floatList) + base64String != null -> visitor.visitBase64String(base64String) + else -> visitor.unknown(_json) + } + + /** + * Validates the embedding data and returns a new validated instance. This method is immutable - + * it returns a new instance if validation is successful, or throws an exception if validation + * fails. + * + * @return this instance if validation succeeds + * @throws OpenAIInvalidDataException if validation fails + */ + fun validate(): EmbeddingValue { + accept( + object : Visitor { + override fun visitFloatList(floatList: List) { + // Validate that float list is not empty and contains valid values + if (floatList.isEmpty()) { + throw OpenAIInvalidDataException("Float list cannot be empty") + } + floatList.forEach { value -> + if (!value.isFinite()) { + throw OpenAIInvalidDataException("Float values must be finite") + } + } + } + + override fun visitBase64String(base64String: String) { + // Validate base64 format + try { + Base64.getDecoder().decode(base64String) + } catch (e: IllegalArgumentException) { + throw OpenAIInvalidDataException("Invalid base64 string", e) + } + } + } + ) + return this // Return this instance if validation succeeds + } + + fun isValid(): Boolean = + try { + validate() + true + } catch (e: OpenAIInvalidDataException) { + false + } + + override fun equals(other: Any?): Boolean { + if (this === other) return true + return other is EmbeddingValue && + floatList == other.floatList && + base64String == other.base64String + } + + override fun hashCode(): Int = Objects.hash(floatList, base64String) + + override fun toString(): String = + when { + floatList != null -> "EmbeddingValue{floatList=$floatList}" + base64String != null -> "EmbeddingValue{base64String=$base64String}" + _json != null -> "EmbeddingValue{_unknown=$_json}" + else -> throw IllegalStateException("Invalid EmbeddingValue") + } + + companion object { + /** + * Creates an EmbeddingValue from a list of floats. The input list is defensively copied to + * ensure immutability. + * + * @param floatList the list of float values (will be copied) + * @return a new immutable EmbeddingValue instance + * @throws OpenAIInvalidDataException if validation fails + */ + @JvmStatic + fun ofFloatList(floatList: List): EmbeddingValue { + // Defensive copy to ensure immutability + val immutableList = floatList.toList() + val instance = EmbeddingValue(floatList = immutableList) + return instance.validate() // Validate upon creation + } + + /** + * Creates an EmbeddingValue from a base64-encoded string. + * + * @param base64String the base64-encoded string + * @return a new immutable EmbeddingValue instance + * @throws OpenAIInvalidDataException if validation fails + */ + @JvmStatic + fun ofBase64String(base64String: String): EmbeddingValue { + val instance = EmbeddingValue(base64String = base64String) + return instance.validate() // Validate upon creation + } + + /** + * Decodes a base64 string to a list of floats. Assumes the base64 string represents an + * array of 32-bit IEEE 754 floats in little-endian format. + */ + private fun decodeBase64ToFloatList(base64String: String): List { + val bytes = Base64.getDecoder().decode(base64String) + val buffer = ByteBuffer.wrap(bytes).asFloatBuffer() + return (0 until buffer.remaining()).map { buffer.get() } + } + + /** + * Encodes a list of floats to a base64 string. Encodes the floats as an array of 32-bit + * IEEE 754 floats in little-endian format. + */ + private fun encodeFloatListToBase64(floatList: List): String { + val buffer = ByteBuffer.allocate(floatList.size * 4) + floatList.forEach { buffer.putFloat(it) } + return Base64.getEncoder().encodeToString(buffer.array()) + } + } + + /** Visitor interface for handling different types of embedding data. */ + interface Visitor { + fun visitFloatList(floatList: List): T + + fun visitBase64String(base64String: String): T + + fun unknown(json: JsonValue?): T { + throw OpenAIInvalidDataException("Unknown EmbeddingValue: $json") + } + } + + internal class Deserializer : BaseDeserializer(EmbeddingValue::class) { + override fun ObjectCodec.deserialize(node: JsonNode): EmbeddingValue { + val json = JsonValue.fromJsonNode(node) + + val bestMatches = + sequenceOf( + tryDeserialize(node, jacksonTypeRef>())?.let { + EmbeddingValue(floatList = it, _json = json) + }, + tryDeserialize(node, jacksonTypeRef())?.let { + EmbeddingValue(base64String = it, _json = json) + }, + ) + .filterNotNull() + .allMaxBy { it.validity() } + .toList() + + return when (bestMatches.size) { + 0 -> EmbeddingValue(_json = json) + 1 -> bestMatches.single() + else -> bestMatches.firstOrNull { it.isValid() } ?: bestMatches.first() + } + } + } + + internal class Serializer : BaseSerializer(EmbeddingValue::class) { + override fun serialize( + value: EmbeddingValue, + generator: JsonGenerator, + provider: SerializerProvider, + ) { + when { + value.floatList != null -> generator.writeObject(value.floatList) + value.base64String != null -> generator.writeObject(value.base64String) + value._json != null -> generator.writeObject(value._json) + else -> throw IllegalStateException("Invalid EmbeddingValue") + } + } + } + + /** Returns a score indicating how many valid values are contained in this object. */ + @JvmSynthetic + internal fun validity(): Int = + when { + floatList != null -> floatList.size + base64String != null -> 1 + else -> 0 + } +} diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDebugTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDebugTest.kt new file mode 100644 index 000000000..5f3d19455 --- /dev/null +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDebugTest.kt @@ -0,0 +1,62 @@ +// Simple debug test for Embedding +package com.openai.models.embeddings + +import com.fasterxml.jackson.module.kotlin.jacksonTypeRef +import com.openai.core.jsonMapper +import org.junit.jupiter.api.Test + +class EmbeddingDebugTest { + + @Test + fun debugEmbeddingCreation() { + println("=== Debug: Creating embedding with builder ===") + + val builder = Embedding.builder() + println("Builder created: $builder") + + builder.addEmbedding(0.0f) + println("After addEmbedding(0.0f): $builder") + + builder.index(0L) + println("After index(0L): $builder") + + val embedding = builder.build() + println("After build(): $embedding") + + try { + val embeddingList = embedding.embedding() + println("embedding.embedding(): $embeddingList") + println("embedding.embedding().size: ${embeddingList.size}") + } catch (e: Exception) { + println("Error calling embedding(): ${e.message}") + e.printStackTrace() + } + + try { + val index = embedding.index() + println("embedding.index(): $index") + } catch (e: Exception) { + println("Error calling index(): ${e.message}") + } + + // Test JSON serialization/deserialization + try { + val jsonMapper = jsonMapper() + val jsonString = jsonMapper.writeValueAsString(embedding) + println("JSON: $jsonString") + + val roundtrippedEmbedding = + jsonMapper.readValue(jsonString, jacksonTypeRef()) + println("Roundtripped: $roundtrippedEmbedding") + + val roundtrippedList = roundtrippedEmbedding.embedding() + println("Roundtripped embedding(): $roundtrippedList") + println("Roundtripped size: ${roundtrippedList.size}") + + println("Original equals roundtripped: ${embedding == roundtrippedEmbedding}") + } catch (e: Exception) { + println("Error in JSON roundtrip: ${e.message}") + e.printStackTrace() + } + } +} diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDefaultsManualTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDefaultsManualTest.kt new file mode 100644 index 000000000..3bb6b2a35 --- /dev/null +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDefaultsManualTest.kt @@ -0,0 +1,49 @@ +package com.openai.models.embeddings + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test + +/** Manual test for EmbeddingDefaults behavior */ +@DisplayName("EmbeddingDefaults Manual Test") +class EmbeddingDefaultsManualTest { + + @Test + @DisplayName("Manual test for global defaults") + fun testGlobalDefaultsManually() { + println("=== Manual Test ===") + + // Step 1: Check original default + val originalDefault = EmbeddingDefaults.defaultEncodingFormat + println("Original default: $originalDefault") + + // Step 2: Change to FLOAT + EmbeddingDefaults.setDefaultEncodingFormat(EmbeddingCreateParams.EncodingFormat.FLOAT) + val changedDefault = EmbeddingDefaults.defaultEncodingFormat + println("Changed default: $changedDefault") + + // Step 3: Build params without explicit encoding + val params = + EmbeddingCreateParams.builder() + .input("test input") + .model("text-embedding-ada-002") + .build() + + println("Params encoding format: ${params.encodingFormat()}") + println("Is present: ${params.encodingFormat().isPresent}") + if (params.encodingFormat().isPresent) { + println("Value: ${params.encodingFormat().get()}") + } + + // Step 4: Reset to defaults + EmbeddingDefaults.resetToDefaults() + val resetDefault = EmbeddingDefaults.defaultEncodingFormat + println("Reset default: $resetDefault") + + // Assertions for verification + assertThat(changedDefault).isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) + assertThat(params.encodingFormat().get()) + .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) + assertThat(resetDefault).isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) + } +} diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt new file mode 100644 index 000000000..b3f5c40c8 --- /dev/null +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt @@ -0,0 +1,58 @@ +package com.openai.models.embeddings + +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test + +/** Step-by-step trace test */ +@DisplayName("Step Test") +class EmbeddingStepTest { + + @Test + @DisplayName("Step 1: Check initial state") + fun step1_checkInitialState() { + println("===== Step 1: Check initial state =====") + val defaultFormat = EmbeddingDefaults.defaultEncodingFormat + println("EmbeddingDefaults.defaultEncodingFormat = $defaultFormat") + println("EncodingFormat.BASE64 = ${EmbeddingCreateParams.EncodingFormat.BASE64}") + println("EncodingFormat.FLOAT = ${EmbeddingCreateParams.EncodingFormat.FLOAT}") + println("Are they equal? ${defaultFormat == EmbeddingCreateParams.EncodingFormat.BASE64}") + } + + @Test + @DisplayName("Step 2: Check builder creation") + fun step2_checkBuilder() { + println("===== Step 2: Check builder creation =====") + val builder = EmbeddingCreateParams.builder().input("test").model("text-embedding-ada-002") + println("Builder created") + + // Check state before build + println("About to build...") + val params = builder.build() + println("Build completed") + + val encodingFormat = params.encodingFormat() + println("encodingFormat() result: $encodingFormat") + println("isPresent: ${encodingFormat.isPresent}") + if (encodingFormat.isPresent) { + println("Value: ${encodingFormat.get()}") + } + } + + @Test + @DisplayName("Step 3: Explicit Base64 setting") + fun step3_explicitBase64() { + println("===== Step 3: Explicit Base64 setting =====") + val params = + EmbeddingCreateParams.builder() + .input("test") + .model("text-embedding-ada-002") + .encodingFormat(EmbeddingCreateParams.EncodingFormat.BASE64) + .build() + + val encodingFormat = params.encodingFormat() + println("After explicit Base64 setting: $encodingFormat") + if (encodingFormat.isPresent) { + println("Value: ${encodingFormat.get()}") + } + } +} diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt new file mode 100644 index 000000000..589024589 --- /dev/null +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt @@ -0,0 +1,263 @@ +package com.openai.models.embeddings + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.DisplayName +import org.junit.jupiter.api.Test + +/** + * Integration test to verify Base64 default functionality and backward compatibility of + * EmbeddingValue. Ensures that both existing List usage and new Base64 format work + * correctly. + */ +@DisplayName("EmbeddingValue Integration Test") +class EmbeddingValueIntegrationTest { + + private var originalDefault: EmbeddingCreateParams.EncodingFormat? = null + + @BeforeEach + fun setUp() { + // Save default settings before test + originalDefault = EmbeddingDefaults.defaultEncodingFormat + } + + @AfterEach + fun tearDown() { + // Restore default settings after test + originalDefault?.let { EmbeddingDefaults.setDefaultEncodingFormat(it) } + } + + /** + * Test to confirm that the default encoding format is Base64. In the new implementation, Base64 + * becomes the default for performance improvements. + */ + @Test + @DisplayName("Confirm that default encoding format is Base64") + fun testDefaultEncodingFormatIsBase64() { + assertThat(EmbeddingDefaults.defaultEncodingFormat) + .describedAs("Default encoding format must be Base64") + .isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) + } + + /** + * Test EmbeddingValue creation and format conversion functionality. + * - Creating EmbeddingValue from Float array + * - Converting to Base64 string + * - Creating EmbeddingValue from Base64 string + * - Auto-decode functionality (Base64 → List) + */ + @Test + @DisplayName("Test EmbeddingValue creation and format conversion") + fun testEmbeddingValueCreationAndConversion() { + val floatList = listOf(1.0f, 2.0f, 3.0f, 4.0f) + + // Create EmbeddingValue from Float array + val embeddingFromFloat = EmbeddingValue.ofFloatList(floatList) + assertThat(embeddingFromFloat.isFloatList()) + .describedAs("EmbeddingValue created from Float array must be in Float format") + .isTrue() + assertThat(embeddingFromFloat.asFloatList()) + .describedAs("Float array contents must match") + .isEqualTo(floatList) + + // Test conversion to Base64 + val base64String = embeddingFromFloat.asBase64String() + assertThat(base64String).describedAs("Base64 string must not be empty").isNotEmpty() + + // Create EmbeddingValue from Base64 string + val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64String) + assertThat(embeddingFromBase64.isBase64String()) + .describedAs("EmbeddingValue created from Base64 string must be in Base64 format") + .isTrue() + assertThat(embeddingFromBase64.base64String()) + .describedAs("Base64 string contents must match") + .isEqualTo(base64String) + + // Test auto-decode: Base64 → List + val decodedFloatList = embeddingFromBase64.asFloatList() + assertThat(decodedFloatList) + .describedAs("Decoded Float array must match the original array") + .isEqualTo(floatList) + } + + /** + * Test explicit Base64 encoding specification in EmbeddingCreateParams.Builder. Confirm that + * Base64 format can be explicitly specified using the encodingFormat() method. + */ + @Test + @DisplayName("Test explicit Base64 encoding specification in EmbeddingCreateParams") + fun testEmbeddingCreateParamsBuilderWithBase64Encoding() { + val params = + EmbeddingCreateParams.builder() + .input("test input") + .model("text-embedding-ada-002") + .encodingFormat(encodingFormat = EmbeddingCreateParams.EncodingFormat.BASE64) + .build() + + assertThat(params.encodingFormat()).describedAs("Encoding format must be set").isPresent() + assertThat(params.encodingFormat().get()) + .describedAs("Explicitly specified encoding format must be Base64") + .isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) + } + + /** + * Test default behavior of EmbeddingCreateParams. Confirm that Base64 is used by default when + * encoding format is not explicitly specified. + */ + @Test + @DisplayName("Test EmbeddingCreateParams default behavior") + fun testEmbeddingCreateParamsDefaultBehavior() { + val params = + EmbeddingCreateParams.builder() + .input("test input") + .model("text-embedding-ada-002") + .build() // Do not explicitly specify encoding format + + assertThat(params.encodingFormat()) + .describedAs("Encoding format must be set by default") + .isPresent() + assertThat(params.encodingFormat().get()) + .describedAs("Default encoding format must be Base64") + .isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) + } + + /** + * Test explicit Float format specification for backward compatibility. Confirm that the + * traditional Float format can be explicitly specified using the encodingFormat() method. + */ + @Test + @DisplayName("Test explicit Float format specification for backward compatibility") + fun testEmbeddingCreateParamsFloatCompatibility() { + val params = + EmbeddingCreateParams.builder() + .input("test input") + .model("text-embedding-ada-002") + .encodingFormat(encodingFormat = EmbeddingCreateParams.EncodingFormat.FLOAT) + .build() + + assertThat(params.encodingFormat()).describedAs("Encoding format must be set").isPresent() + assertThat(params.encodingFormat().get()) + .describedAs( + "Explicitly specified encoding format for backward compatibility must be Float" + ) + .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) + } + + /** + * Test EmbeddingDefaults global configuration change functionality. + * - Change default setting to Float + * - Confirm that new default setting is applied + * - Confirm that settings can be reset + */ + @Test + @DisplayName("Test EmbeddingDefaults global configuration change") + fun testEmbeddingDefaultsCanBeChanged() { + val originalDefault = EmbeddingDefaults.defaultEncodingFormat + + try { + // Change default to FLOAT + EmbeddingDefaults.setDefaultEncodingFormat(EmbeddingCreateParams.EncodingFormat.FLOAT) + assertThat(EmbeddingDefaults.defaultEncodingFormat) + .describedAs("Default setting must be changed to FLOAT") + .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) + + // Test that new instances use the new default + val params = + EmbeddingCreateParams.builder() + .input("test input") + .model("text-embedding-ada-002") + .build() + + // Debug information + println( + "EmbeddingDefaults.defaultEncodingFormat = ${EmbeddingDefaults.defaultEncodingFormat}" + ) + println("params.encodingFormat() = ${params.encodingFormat()}") + println("params.encodingFormat().isPresent = ${params.encodingFormat().isPresent}") + if (params.encodingFormat().isPresent) { + println("params.encodingFormat().get() = ${params.encodingFormat().get()}") + } + + assertThat(params.encodingFormat().get()) + .describedAs("New instances must use the changed default setting") + .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) + + // Test default reset functionality + EmbeddingDefaults.resetToDefaults() + assertThat(EmbeddingDefaults.defaultEncodingFormat) + .describedAs("After reset, Base64 must be returned as default") + .isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) + } finally { + // Restore original default setting + EmbeddingDefaults.setDefaultEncodingFormat(originalDefault) + } + } + + /** + * Test EmbeddingValue validation functionality. + * - Validation failure with empty Float array + * - Validation failure with invalid Base64 string + */ + @Test + @DisplayName("Test EmbeddingValue validation functionality") + fun testEmbeddingValueValidation() { + // Test validation success with valid data + val validFloatList = listOf(1.0f, 2.0f, 3.0f) + val validEmbedding = EmbeddingValue.ofFloatList(validFloatList) + + assertThat(validEmbedding.validate()) + .describedAs("Validation with valid data must succeed") + .isNotNull() + .isEqualTo(validEmbedding) + } + + /** + * Test EmbeddingValue visitor pattern implementation. + * - Visitor call for Float array case + * - Visitor call for Base64 string case + */ + @Test + @DisplayName("Test EmbeddingValue visitor pattern") + fun testEmbeddingValueVisitorPattern() { + val floatList = listOf(1.0f, 2.0f, 3.0f) + val embeddingFromFloat = EmbeddingValue.ofFloatList(floatList) + + // Visitor for Float array case + val floatResult = + embeddingFromFloat.accept( + object : EmbeddingValue.Visitor { + override fun visitFloatList(floatList: List): String = "float_visited" + + override fun visitBase64String(base64String: String): String = "base64_visited" + + override fun unknown(json: com.openai.core.JsonValue?): String = + "unknown_visited" + } + ) + + assertThat(floatResult) + .describedAs("For Float array case, visitFloatList must be called") + .isEqualTo("float_visited") + + // Visitor for Base64 case + val base64String = embeddingFromFloat.asBase64String() + val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64String) + + val base64Result = + embeddingFromBase64.accept( + object : EmbeddingValue.Visitor { + override fun visitFloatList(floatList: List): String = "float_visited" + + override fun visitBase64String(base64String: String): String = "base64_visited" + + override fun unknown(json: com.openai.core.JsonValue?): String = + "unknown_visited" + } + ) + + assertThat(base64Result) + .describedAs("For Base64 string case, visitBase64String must be called") + .isEqualTo("base64_visited") + } +} From a7d881be684975e3da6d61d2724def59e7b7169f Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Tue, 1 Jul 2025 07:51:01 +0900 Subject: [PATCH 02/17] Deleted invalid EmbeddingDefaults.kt Deleted invalid EmbeddingDefaults.kt --- .../src/main/kotlin/com/openai/core/EmbeddingDefaults.kt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 openai-java-core/src/main/kotlin/com/openai/core/EmbeddingDefaults.kt diff --git a/openai-java-core/src/main/kotlin/com/openai/core/EmbeddingDefaults.kt b/openai-java-core/src/main/kotlin/com/openai/core/EmbeddingDefaults.kt deleted file mode 100644 index 8b1378917..000000000 --- a/openai-java-core/src/main/kotlin/com/openai/core/EmbeddingDefaults.kt +++ /dev/null @@ -1 +0,0 @@ - From 09bb653a31e6cf89ddac952c776ded8f77903128 Mon Sep 17 00:00:00 2001 From: Tomer Aberbach Date: Tue, 22 Jul 2025 09:56:36 -0400 Subject: [PATCH 03/17] fix: actually add system properties --- .../main/kotlin/com/openai/core/ClientOptions.kt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/core/ClientOptions.kt b/openai-java-core/src/main/kotlin/com/openai/core/ClientOptions.kt index cf847f31f..4a5206bf7 100644 --- a/openai-java-core/src/main/kotlin/com/openai/core/ClientOptions.kt +++ b/openai-java-core/src/main/kotlin/com/openai/core/ClientOptions.kt @@ -277,13 +277,17 @@ private constructor( fun timeout(): Timeout = timeout fun fromEnv() = apply { - System.getenv("OPENAI_BASE_URL")?.let { baseUrl(it) } + (System.getProperty("openai.baseUrl") ?: System.getenv("OPENAI_BASE_URL"))?.let { + baseUrl(it) + } - val openAIKey = System.getenv("OPENAI_API_KEY") - val openAIOrgId = System.getenv("OPENAI_ORG_ID") - val openAIProjectId = System.getenv("OPENAI_PROJECT_ID") + val openAIKey = System.getProperty("openai.apiKey") ?: System.getenv("OPENAI_API_KEY") + val openAIOrgId = System.getProperty("openai.orgId") ?: System.getenv("OPENAI_ORG_ID") + val openAIProjectId = + System.getProperty("openai.projectId") ?: System.getenv("OPENAI_PROJECT_ID") val azureOpenAIKey = System.getenv("AZURE_OPENAI_KEY") - val openAIWebhookSecret = System.getenv("OPENAI_WEBHOOK_SECRET") + val openAIWebhookSecret = + System.getProperty("openai.webhookSecret") ?: System.getenv("OPENAI_WEBHOOK_SECRET") if (!openAIWebhookSecret.isNullOrEmpty()) { webhookSecret(openAIWebhookSecret) } From 985fa9a666e341ff4db601bf2bfc5df3664ca4bd Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Wed, 23 Jul 2025 13:16:52 +0900 Subject: [PATCH 04/17] Modified the field name for floats and base64 Modified the field name for floats and base64 --- .../models/embeddings/EmbeddingValue.kt | 84 +++++++++---------- .../EmbeddingValueIntegrationTest.kt | 36 ++++---- 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt index acb4e3c97..3f17e0615 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt @@ -28,25 +28,25 @@ import java.util.Objects @JsonSerialize(using = EmbeddingValue.Serializer::class) class EmbeddingValue private constructor( - private val floatList: List? = null, - private val base64String: String? = null, + private val floats: List? = null, + private val base64: String? = null, private val _json: JsonValue? = null, ) { /** Returns the embedding as a list of floats, or null if this value represents base64 data. */ - fun floatList(): List? = floatList + fun floatList(): List? = floats /** * Returns the embedding as a base64-encoded string, or null if this value represents float * data. */ - fun base64String(): String? = base64String + fun base64String(): String? = base64 /** Returns true if this value contains float list data. */ - fun isFloatList(): Boolean = floatList != null + fun isFloatList(): Boolean = floats != null /** Returns true if this value contains base64 string data. */ - fun isBase64String(): Boolean = base64String != null + fun isBase64String(): Boolean = base64 != null /** * Returns the embedding data as a list of floats. @@ -63,9 +63,9 @@ private constructor( */ fun asFloatList(): List = when { - floatList != null -> floatList - base64String != null -> - decodeBase64ToFloatList(base64String) // Automatic Base64 decoding + floats != null -> floats + base64 != null -> + decodeBase64ToFloatList(base64) // Automatic Base64 decoding else -> throw IllegalStateException("No valid embedding data") } @@ -75,8 +75,8 @@ private constructor( */ fun asBase64String(): String = when { - base64String != null -> base64String - floatList != null -> encodeFloatListToBase64(floatList) + base64 != null -> base64 + floats != null -> encodeFloatListToBase64(floats) else -> throw IllegalStateException("No valid embedding data") } @@ -86,8 +86,8 @@ private constructor( /** Accepts a visitor that can handle both float list and base64 string cases. */ fun accept(visitor: Visitor): T = when { - floatList != null -> visitor.visitFloatList(floatList) - base64String != null -> visitor.visitBase64String(base64String) + floats != null -> visitor.visitFloatList(floats) + base64 != null -> visitor.visitBase64String(base64) else -> visitor.unknown(_json) } @@ -102,22 +102,22 @@ private constructor( fun validate(): EmbeddingValue { accept( object : Visitor { - override fun visitFloatList(floatList: List) { + override fun visitFloatList(floats: List) { // Validate that float list is not empty and contains valid values - if (floatList.isEmpty()) { + if (floats.isEmpty()) { throw OpenAIInvalidDataException("Float list cannot be empty") } - floatList.forEach { value -> + floats.forEach { value -> if (!value.isFinite()) { throw OpenAIInvalidDataException("Float values must be finite") } } } - override fun visitBase64String(base64String: String) { + override fun visitBase64String(base64: String) { // Validate base64 format try { - Base64.getDecoder().decode(base64String) + Base64.getDecoder().decode(base64) } catch (e: IllegalArgumentException) { throw OpenAIInvalidDataException("Invalid base64 string", e) } @@ -138,16 +138,16 @@ private constructor( override fun equals(other: Any?): Boolean { if (this === other) return true return other is EmbeddingValue && - floatList == other.floatList && - base64String == other.base64String + floats == other.floats && + base64 == other.base64 } - override fun hashCode(): Int = Objects.hash(floatList, base64String) + override fun hashCode(): Int = Objects.hash(floats, base64) override fun toString(): String = when { - floatList != null -> "EmbeddingValue{floatList=$floatList}" - base64String != null -> "EmbeddingValue{base64String=$base64String}" + floats != null -> "EmbeddingValue{floats=$floats}" + base64 != null -> "EmbeddingValue{base64=$base64}" _json != null -> "EmbeddingValue{_unknown=$_json}" else -> throw IllegalStateException("Invalid EmbeddingValue") } @@ -157,15 +157,15 @@ private constructor( * Creates an EmbeddingValue from a list of floats. The input list is defensively copied to * ensure immutability. * - * @param floatList the list of float values (will be copied) + * @param floats the list of float values (will be copied) * @return a new immutable EmbeddingValue instance * @throws OpenAIInvalidDataException if validation fails */ @JvmStatic - fun ofFloatList(floatList: List): EmbeddingValue { + fun ofFloatList(floats: List): EmbeddingValue { // Defensive copy to ensure immutability - val immutableList = floatList.toList() - val instance = EmbeddingValue(floatList = immutableList) + val immutableList = floats.toList() + val instance = EmbeddingValue(floats = immutableList) return instance.validate() // Validate upon creation } @@ -177,8 +177,8 @@ private constructor( * @throws OpenAIInvalidDataException if validation fails */ @JvmStatic - fun ofBase64String(base64String: String): EmbeddingValue { - val instance = EmbeddingValue(base64String = base64String) + fun ofBase64String(base64: String): EmbeddingValue { + val instance = EmbeddingValue(base64 = base64) return instance.validate() // Validate upon creation } @@ -186,8 +186,8 @@ private constructor( * Decodes a base64 string to a list of floats. Assumes the base64 string represents an * array of 32-bit IEEE 754 floats in little-endian format. */ - private fun decodeBase64ToFloatList(base64String: String): List { - val bytes = Base64.getDecoder().decode(base64String) + private fun decodeBase64ToFloatList(base64: String): List { + val bytes = Base64.getDecoder().decode(base64) val buffer = ByteBuffer.wrap(bytes).asFloatBuffer() return (0 until buffer.remaining()).map { buffer.get() } } @@ -196,18 +196,18 @@ private constructor( * Encodes a list of floats to a base64 string. Encodes the floats as an array of 32-bit * IEEE 754 floats in little-endian format. */ - private fun encodeFloatListToBase64(floatList: List): String { - val buffer = ByteBuffer.allocate(floatList.size * 4) - floatList.forEach { buffer.putFloat(it) } + private fun encodeFloatListToBase64(floats: List): String { + val buffer = ByteBuffer.allocate(floats.size * 4) + floats.forEach { buffer.putFloat(it) } return Base64.getEncoder().encodeToString(buffer.array()) } } /** Visitor interface for handling different types of embedding data. */ interface Visitor { - fun visitFloatList(floatList: List): T + fun visitFloatList(floats: List): T - fun visitBase64String(base64String: String): T + fun visitBase64String(base64: String): T fun unknown(json: JsonValue?): T { throw OpenAIInvalidDataException("Unknown EmbeddingValue: $json") @@ -221,10 +221,10 @@ private constructor( val bestMatches = sequenceOf( tryDeserialize(node, jacksonTypeRef>())?.let { - EmbeddingValue(floatList = it, _json = json) + EmbeddingValue(floats = it, _json = json) }, tryDeserialize(node, jacksonTypeRef())?.let { - EmbeddingValue(base64String = it, _json = json) + EmbeddingValue(base64 = it, _json = json) }, ) .filterNotNull() @@ -246,8 +246,8 @@ private constructor( provider: SerializerProvider, ) { when { - value.floatList != null -> generator.writeObject(value.floatList) - value.base64String != null -> generator.writeObject(value.base64String) + value.floats != null -> generator.writeObject(value.floats) + value.base64 != null -> generator.writeObject(value.base64) value._json != null -> generator.writeObject(value._json) else -> throw IllegalStateException("Invalid EmbeddingValue") } @@ -258,8 +258,8 @@ private constructor( @JvmSynthetic internal fun validity(): Int = when { - floatList != null -> floatList.size - base64String != null -> 1 + floats != null -> floats.size + base64 != null -> 1 else -> 0 } } diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt index 589024589..a10b602f6 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt @@ -50,20 +50,20 @@ class EmbeddingValueIntegrationTest { @Test @DisplayName("Test EmbeddingValue creation and format conversion") fun testEmbeddingValueCreationAndConversion() { - val floatList = listOf(1.0f, 2.0f, 3.0f, 4.0f) + val floats = listOf(1.0f, 2.0f, 3.0f, 4.0f) // Create EmbeddingValue from Float array - val embeddingFromFloat = EmbeddingValue.ofFloatList(floatList) + val embeddingFromFloat = EmbeddingValue.ofFloatList(floats) assertThat(embeddingFromFloat.isFloatList()) .describedAs("EmbeddingValue created from Float array must be in Float format") .isTrue() assertThat(embeddingFromFloat.asFloatList()) .describedAs("Float array contents must match") - .isEqualTo(floatList) + .isEqualTo(floats) // Test conversion to Base64 - val base64String = embeddingFromFloat.asBase64String() - assertThat(base64String).describedAs("Base64 string must not be empty").isNotEmpty() + val base64 = embeddingFromFloat.asBase64String() + assertThat(base64).describedAs("Base64 string must not be empty").isNotEmpty() // Create EmbeddingValue from Base64 string val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64String) @@ -72,13 +72,13 @@ class EmbeddingValueIntegrationTest { .isTrue() assertThat(embeddingFromBase64.base64String()) .describedAs("Base64 string contents must match") - .isEqualTo(base64String) + .isEqualTo(base64) // Test auto-decode: Base64 → List - val decodedFloatList = embeddingFromBase64.asFloatList() - assertThat(decodedFloatList) + val decodedFloats = embeddingFromBase64.asFloatList() + assertThat(decodedFloats) .describedAs("Decoded Float array must match the original array") - .isEqualTo(floatList) + .isEqualTo(floats) } /** @@ -203,8 +203,8 @@ class EmbeddingValueIntegrationTest { @DisplayName("Test EmbeddingValue validation functionality") fun testEmbeddingValueValidation() { // Test validation success with valid data - val validFloatList = listOf(1.0f, 2.0f, 3.0f) - val validEmbedding = EmbeddingValue.ofFloatList(validFloatList) + val validFloats = listOf(1.0f, 2.0f, 3.0f) + val validEmbedding = EmbeddingValue.ofFloatList(validFloats) assertThat(validEmbedding.validate()) .describedAs("Validation with valid data must succeed") @@ -220,16 +220,16 @@ class EmbeddingValueIntegrationTest { @Test @DisplayName("Test EmbeddingValue visitor pattern") fun testEmbeddingValueVisitorPattern() { - val floatList = listOf(1.0f, 2.0f, 3.0f) - val embeddingFromFloat = EmbeddingValue.ofFloatList(floatList) + val floats = listOf(1.0f, 2.0f, 3.0f) + val embeddingFromFloat = EmbeddingValue.ofFloatList(floats) // Visitor for Float array case val floatResult = embeddingFromFloat.accept( object : EmbeddingValue.Visitor { - override fun visitFloatList(floatList: List): String = "float_visited" + override fun visitFloatList(floats: List): String = "float_visited" - override fun visitBase64String(base64String: String): String = "base64_visited" + override fun visitBase64String(base64: String): String = "base64_visited" override fun unknown(json: com.openai.core.JsonValue?): String = "unknown_visited" @@ -241,13 +241,13 @@ class EmbeddingValueIntegrationTest { .isEqualTo("float_visited") // Visitor for Base64 case - val base64String = embeddingFromFloat.asBase64String() - val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64String) + val base64 = embeddingFromFloat.asBase64String() + val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64) val base64Result = embeddingFromBase64.accept( object : EmbeddingValue.Visitor { - override fun visitFloatList(floatList: List): String = "float_visited" + override fun visitFloatList(floats: List): String = "float_visited" override fun visitBase64String(base64String: String): String = "base64_visited" From 4b66e6310de87247c80f7c2b8375cabb283a39ef Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Wed, 23 Jul 2025 13:17:12 +0900 Subject: [PATCH 05/17] Deleted DebutTest Deleted DebutTest --- .../models/embeddings/EmbeddingDebugTest.kt | 62 ------------------- 1 file changed, 62 deletions(-) delete mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDebugTest.kt diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDebugTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDebugTest.kt deleted file mode 100644 index 5f3d19455..000000000 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDebugTest.kt +++ /dev/null @@ -1,62 +0,0 @@ -// Simple debug test for Embedding -package com.openai.models.embeddings - -import com.fasterxml.jackson.module.kotlin.jacksonTypeRef -import com.openai.core.jsonMapper -import org.junit.jupiter.api.Test - -class EmbeddingDebugTest { - - @Test - fun debugEmbeddingCreation() { - println("=== Debug: Creating embedding with builder ===") - - val builder = Embedding.builder() - println("Builder created: $builder") - - builder.addEmbedding(0.0f) - println("After addEmbedding(0.0f): $builder") - - builder.index(0L) - println("After index(0L): $builder") - - val embedding = builder.build() - println("After build(): $embedding") - - try { - val embeddingList = embedding.embedding() - println("embedding.embedding(): $embeddingList") - println("embedding.embedding().size: ${embeddingList.size}") - } catch (e: Exception) { - println("Error calling embedding(): ${e.message}") - e.printStackTrace() - } - - try { - val index = embedding.index() - println("embedding.index(): $index") - } catch (e: Exception) { - println("Error calling index(): ${e.message}") - } - - // Test JSON serialization/deserialization - try { - val jsonMapper = jsonMapper() - val jsonString = jsonMapper.writeValueAsString(embedding) - println("JSON: $jsonString") - - val roundtrippedEmbedding = - jsonMapper.readValue(jsonString, jacksonTypeRef()) - println("Roundtripped: $roundtrippedEmbedding") - - val roundtrippedList = roundtrippedEmbedding.embedding() - println("Roundtripped embedding(): $roundtrippedList") - println("Roundtripped size: ${roundtrippedList.size}") - - println("Original equals roundtripped: ${embedding == roundtrippedEmbedding}") - } catch (e: Exception) { - println("Error in JSON roundtrip: ${e.message}") - e.printStackTrace() - } - } -} From e85a5a911f6f2c7aede07994538713b55d80877a Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Wed, 23 Jul 2025 13:33:05 +0900 Subject: [PATCH 06/17] Modified validate method and refactored apply scope function * validate() methods validate anything the "shape" of the data being correct * The code has been refactored to use Kotlin's apply scope function for improved conciseness and consistency. --- .../models/embeddings/EmbeddingValue.kt | 48 ++++++------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt index 3f17e0615..9fc17f967 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt @@ -101,27 +101,9 @@ private constructor( */ fun validate(): EmbeddingValue { accept( - object : Visitor { - override fun visitFloatList(floats: List) { - // Validate that float list is not empty and contains valid values - if (floats.isEmpty()) { - throw OpenAIInvalidDataException("Float list cannot be empty") - } - floats.forEach { value -> - if (!value.isFinite()) { - throw OpenAIInvalidDataException("Float values must be finite") - } - } - } - - override fun visitBase64String(base64: String) { - // Validate base64 format - try { - Base64.getDecoder().decode(base64) - } catch (e: IllegalArgumentException) { - throw OpenAIInvalidDataException("Invalid base64 string", e) - } - } + object : Visitor { + override fun visitFloatList(floatList: List) {} + override fun visitBase64String(base64String: String) {} } ) return this // Return this instance if validation succeeds @@ -163,10 +145,7 @@ private constructor( */ @JvmStatic fun ofFloatList(floats: List): EmbeddingValue { - // Defensive copy to ensure immutability - val immutableList = floats.toList() - val instance = EmbeddingValue(floats = immutableList) - return instance.validate() // Validate upon creation + return EmbeddingValue(floats = floats.toList()).apply { validate() } } /** @@ -178,8 +157,7 @@ private constructor( */ @JvmStatic fun ofBase64String(base64: String): EmbeddingValue { - val instance = EmbeddingValue(base64 = base64) - return instance.validate() // Validate upon creation + return EmbeddingValue(base64 = base64).apply { validate() } } /** @@ -187,9 +165,11 @@ private constructor( * array of 32-bit IEEE 754 floats in little-endian format. */ private fun decodeBase64ToFloatList(base64: String): List { - val bytes = Base64.getDecoder().decode(base64) - val buffer = ByteBuffer.wrap(bytes).asFloatBuffer() - return (0 until buffer.remaining()).map { buffer.get() } + return Base64.getDecoder().decode(base64).let { bytes -> + ByteBuffer.wrap(bytes).asFloatBuffer().let { buffer -> + (0 until buffer.remaining()).map { buffer.get() } + } + } } /** @@ -197,9 +177,11 @@ private constructor( * IEEE 754 floats in little-endian format. */ private fun encodeFloatListToBase64(floats: List): String { - val buffer = ByteBuffer.allocate(floats.size * 4) - floats.forEach { buffer.putFloat(it) } - return Base64.getEncoder().encodeToString(buffer.array()) + return ByteBuffer.allocate(floats.size * 4).apply { + floats.forEach { putFloat(it) } + }.array().let { bytes -> + Base64.getEncoder().encodeToString(bytes) + } } } From 9adf9d6996bef9f4a5c4e15b415f0431c922cf8e Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Wed, 23 Jul 2025 14:15:05 +0900 Subject: [PATCH 07/17] Modified default encoding Modified default encoding --- .../embeddings/EmbeddingCreateParams.kt | 16 +-- plan2.md | 108 ++++++++++++++++++ 2 files changed, 111 insertions(+), 13 deletions(-) create mode 100644 plan2.md diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt index 40b424923..696e4c526 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt @@ -159,6 +159,7 @@ private constructor( private var body: Body.Builder = Body.builder() private var additionalHeaders: Headers.Builder = Headers.builder() private var additionalQueryParams: QueryParams.Builder = QueryParams.builder() + private var encodingFormat: JsonField = JsonField.of(EmbeddingDefaults.defaultEncodingFormat) @JvmSynthetic internal fun from(embeddingCreateParams: EmbeddingCreateParams) = apply { @@ -262,7 +263,7 @@ private constructor( * [`base64`](https://pypi.org/project/pybase64/). */ fun encodingFormat(encodingFormat: EncodingFormat) = apply { - body.encodingFormat(encodingFormat) + this.encodingFormat = JsonField.of(encodingFormat) } /** @@ -273,7 +274,7 @@ private constructor( * supported value. */ fun encodingFormat(encodingFormat: JsonField) = apply { - body.encodingFormat(encodingFormat) + this.encodingFormat = encodingFormat } /** @@ -422,11 +423,6 @@ private constructor( * @throws IllegalStateException if any required field is unset. */ fun build(): EmbeddingCreateParams { - // Apply default encoding format if not explicitly set - if (body._encodingFormat().isMissing()) { - body.encodingFormat(EmbeddingDefaults.defaultEncodingFormat) - } - return EmbeddingCreateParams( body.build(), additionalHeaders.build(), @@ -733,12 +729,6 @@ private constructor( keys.forEach(::removeAdditionalProperty) } - /** - * Internal method to check if encodingFormat has been set. Used by the main Builder to - * determine if default should be applied. - */ - internal fun _encodingFormat(): JsonField = encodingFormat - /** * Returns an immutable instance of [Body]. * diff --git a/plan2.md b/plan2.md new file mode 100644 index 000000000..0b5b12fdc --- /dev/null +++ b/plan2.md @@ -0,0 +1,108 @@ +# Proposed Fixes for TomerAberbach's Comment + +## Comment Overview + +TomerAberbach suggested the following changes: + +1. Avoid applying the default encoding format in the current method. +2. Update the builder field to initialize directly with the default value: + + ```kotlin + private var encodingFormat: JsonField = JsonField.of(EmbeddingDefaults.defaultEncodingFormat) + ``` + +3. Remove the new internal method introduced for handling the default encoding format. + +## Proposed Solutions + +### Solution 1: Direct Field Initialization + +**Overview**: +Initialize the `encodingFormat` field in the builder directly with the default value. + +**Changes**: + +- Update the builder field: + + ```kotlin + private var encodingFormat: JsonField = JsonField.of(EmbeddingDefaults.defaultEncodingFormat) + ``` + +- Remove the internal method `_encodingFormat()`. + +**Advantages**: + +- Simplifies the code. +- Avoids redundant method calls. +- Aligns with the suggested approach. + +**Disadvantages**: + +- Requires careful testing to ensure backward compatibility. + +### Solution 2: Lazy Initialization + +**Overview**: +Use lazy initialization for the `encodingFormat` field to set the default value only when accessed. + +**Changes**: + +- Update the builder field: + + ```kotlin + private var encodingFormat: JsonField by lazy { + JsonField.of(EmbeddingDefaults.defaultEncodingFormat) + } + ``` + +- Remove the internal method `_encodingFormat()`. + +**Advantages**: + +- Ensures the default value is only set when needed. +- Reduces memory usage for unused fields. + +**Disadvantages**: + +- Slightly increases complexity. +- Lazy initialization may introduce subtle bugs if not handled properly. + +### Solution 3: Default Value in Constructor + +**Overview**: + +Set the default value for `encodingFormat` in the constructor of the builder. + +**Changes**: + +- Update the builder constructor: + + ```kotlin + class Builder internal constructor() { + private var encodingFormat: JsonField = JsonField.of(EmbeddingDefaults.defaultEncodingFormat) + // ...existing code... + } + ``` + +- Remove the internal method `_encodingFormat()`. + +**Advantages**: + +- Ensures the default value is set during object creation. +- Simplifies field initialization. + +**Disadvantages**: + +- Requires changes to the constructor logic. +- May affect existing tests relying on the previous initialization method. + +## Summary + +| Solution | Advantages | Disadvantages | +|----------|------------|----------------| +| Direct Field Initialization | Simplifies code, avoids redundancy | Requires careful testing | +| Lazy Initialization | Reduces memory usage, sets default only when needed | Slightly complex, potential bugs | +| Default Value in Constructor | Ensures default during creation, simplifies initialization | Changes constructor logic, affects tests | + +**Recommendation**: +Solution 1 (Direct Field Initialization) is recommended for its simplicity and alignment with the suggested approach. It avoids unnecessary complexity while ensuring the default value is set correctly. From 31a235620471d6c5d6dbb8b0b7549dd55170a94f Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Wed, 23 Jul 2025 15:12:34 +0900 Subject: [PATCH 08/17] Modified the implementation of default encoding Modified the implementation of default encoding --- .../embeddings/EmbeddingCreateParams.kt | 2 +- .../models/embeddings/EmbeddingDefaults.kt | 57 ------------- .../embeddings/EmbeddingDefaultsManualTest.kt | 49 ------------ .../models/embeddings/EmbeddingStepTest.kt | 10 ++- .../EmbeddingValueIntegrationTest.kt | 80 +------------------ 5 files changed, 9 insertions(+), 189 deletions(-) delete mode 100644 openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingDefaults.kt delete mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDefaultsManualTest.kt diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt index 696e4c526..dbc37f78c 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt @@ -159,7 +159,7 @@ private constructor( private var body: Body.Builder = Body.builder() private var additionalHeaders: Headers.Builder = Headers.builder() private var additionalQueryParams: QueryParams.Builder = QueryParams.builder() - private var encodingFormat: JsonField = JsonField.of(EmbeddingDefaults.defaultEncodingFormat) + private var encodingFormat: JsonField = JsonField.of(EncodingFormat.BASE64) @JvmSynthetic internal fun from(embeddingCreateParams: EmbeddingCreateParams) = apply { diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingDefaults.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingDefaults.kt deleted file mode 100644 index 251e4f8a3..000000000 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingDefaults.kt +++ /dev/null @@ -1,57 +0,0 @@ -// File generated from our OpenAPI spec by Stainless. - -package com.openai.models.embeddings - -import com.openai.models.embeddings.EmbeddingCreateParams.EncodingFormat - -/** - * Configuration object for default embedding behavior. This allows users to change the default - * encoding format globally. - * - * By default, Base64 encoding is used for optimal performance and reduced network bandwidth. Users - * can explicitly choose float encoding when direct float access is needed. - */ -object EmbeddingDefaults { - - @JvmStatic - @get:JvmName("getDefaultEncodingFormat") - @set:JvmName("setDefaultEncodingFormat") - var defaultEncodingFormat: EncodingFormat = EncodingFormat.BASE64 // Default is Base64 - private set - - /** - * Set the default encoding format for embeddings. This will be applied when no explicit format - * is specified in EmbeddingCreateParams. - * - * @param format the encoding format to use as default - */ - @JvmStatic - fun setDefaultEncodingFormat(format: EncodingFormat) { - defaultEncodingFormat = format - } - - /** - * Reset the default encoding format to Base64 (the recommended default). Base64 encoding - * provides better performance and reduced network bandwidth usage. - */ - @JvmStatic - fun resetToDefaults() { - defaultEncodingFormat = EncodingFormat.BASE64 - } - - /** - * Configure the system to use float encoding as default. This is primarily for backward - * compatibility scenarios. Note: Float encoding uses more network bandwidth and may impact - * performance. For most use cases, the default base64 encoding is recommended. - */ - @JvmStatic - fun enableLegacyFloatDefaults() { - defaultEncodingFormat = EncodingFormat.FLOAT - } - - /** Returns true if the current default encoding format is BASE64. */ - @JvmStatic fun isUsingBase64Defaults(): Boolean = defaultEncodingFormat == EncodingFormat.BASE64 - - /** Returns true if the current default encoding format is FLOAT. */ - @JvmStatic fun isUsingFloatDefaults(): Boolean = defaultEncodingFormat == EncodingFormat.FLOAT -} diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDefaultsManualTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDefaultsManualTest.kt deleted file mode 100644 index 3bb6b2a35..000000000 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingDefaultsManualTest.kt +++ /dev/null @@ -1,49 +0,0 @@ -package com.openai.models.embeddings - -import org.assertj.core.api.Assertions.assertThat -import org.junit.jupiter.api.DisplayName -import org.junit.jupiter.api.Test - -/** Manual test for EmbeddingDefaults behavior */ -@DisplayName("EmbeddingDefaults Manual Test") -class EmbeddingDefaultsManualTest { - - @Test - @DisplayName("Manual test for global defaults") - fun testGlobalDefaultsManually() { - println("=== Manual Test ===") - - // Step 1: Check original default - val originalDefault = EmbeddingDefaults.defaultEncodingFormat - println("Original default: $originalDefault") - - // Step 2: Change to FLOAT - EmbeddingDefaults.setDefaultEncodingFormat(EmbeddingCreateParams.EncodingFormat.FLOAT) - val changedDefault = EmbeddingDefaults.defaultEncodingFormat - println("Changed default: $changedDefault") - - // Step 3: Build params without explicit encoding - val params = - EmbeddingCreateParams.builder() - .input("test input") - .model("text-embedding-ada-002") - .build() - - println("Params encoding format: ${params.encodingFormat()}") - println("Is present: ${params.encodingFormat().isPresent}") - if (params.encodingFormat().isPresent) { - println("Value: ${params.encodingFormat().get()}") - } - - // Step 4: Reset to defaults - EmbeddingDefaults.resetToDefaults() - val resetDefault = EmbeddingDefaults.defaultEncodingFormat - println("Reset default: $resetDefault") - - // Assertions for verification - assertThat(changedDefault).isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) - assertThat(params.encodingFormat().get()) - .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) - assertThat(resetDefault).isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) - } -} diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt index b3f5c40c8..e2f3ab856 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt @@ -11,11 +11,15 @@ class EmbeddingStepTest { @DisplayName("Step 1: Check initial state") fun step1_checkInitialState() { println("===== Step 1: Check initial state =====") - val defaultFormat = EmbeddingDefaults.defaultEncodingFormat - println("EmbeddingDefaults.defaultEncodingFormat = $defaultFormat") + val params = + EmbeddingCreateParams.builder().input("test").model("text-embedding-ada-002").build() + val defaultFormat = params.encodingFormat().orElse(null) + println("Default encodingFormat in params = $defaultFormat") println("EncodingFormat.BASE64 = ${EmbeddingCreateParams.EncodingFormat.BASE64}") println("EncodingFormat.FLOAT = ${EmbeddingCreateParams.EncodingFormat.FLOAT}") - println("Are they equal? ${defaultFormat == EmbeddingCreateParams.EncodingFormat.BASE64}") + println( + "Is default BASE64? ${defaultFormat == EmbeddingCreateParams.EncodingFormat.BASE64}" + ) } @Test diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt index a10b602f6..066c7b45b 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt @@ -1,8 +1,6 @@ package com.openai.models.embeddings import org.assertj.core.api.Assertions.assertThat -import org.junit.jupiter.api.AfterEach -import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.DisplayName import org.junit.jupiter.api.Test @@ -14,32 +12,6 @@ import org.junit.jupiter.api.Test @DisplayName("EmbeddingValue Integration Test") class EmbeddingValueIntegrationTest { - private var originalDefault: EmbeddingCreateParams.EncodingFormat? = null - - @BeforeEach - fun setUp() { - // Save default settings before test - originalDefault = EmbeddingDefaults.defaultEncodingFormat - } - - @AfterEach - fun tearDown() { - // Restore default settings after test - originalDefault?.let { EmbeddingDefaults.setDefaultEncodingFormat(it) } - } - - /** - * Test to confirm that the default encoding format is Base64. In the new implementation, Base64 - * becomes the default for performance improvements. - */ - @Test - @DisplayName("Confirm that default encoding format is Base64") - fun testDefaultEncodingFormatIsBase64() { - assertThat(EmbeddingDefaults.defaultEncodingFormat) - .describedAs("Default encoding format must be Base64") - .isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) - } - /** * Test EmbeddingValue creation and format conversion functionality. * - Creating EmbeddingValue from Float array @@ -66,7 +38,7 @@ class EmbeddingValueIntegrationTest { assertThat(base64).describedAs("Base64 string must not be empty").isNotEmpty() // Create EmbeddingValue from Base64 string - val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64String) + val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64) assertThat(embeddingFromBase64.isBase64String()) .describedAs("EmbeddingValue created from Base64 string must be in Base64 format") .isTrue() @@ -144,56 +116,6 @@ class EmbeddingValueIntegrationTest { .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) } - /** - * Test EmbeddingDefaults global configuration change functionality. - * - Change default setting to Float - * - Confirm that new default setting is applied - * - Confirm that settings can be reset - */ - @Test - @DisplayName("Test EmbeddingDefaults global configuration change") - fun testEmbeddingDefaultsCanBeChanged() { - val originalDefault = EmbeddingDefaults.defaultEncodingFormat - - try { - // Change default to FLOAT - EmbeddingDefaults.setDefaultEncodingFormat(EmbeddingCreateParams.EncodingFormat.FLOAT) - assertThat(EmbeddingDefaults.defaultEncodingFormat) - .describedAs("Default setting must be changed to FLOAT") - .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) - - // Test that new instances use the new default - val params = - EmbeddingCreateParams.builder() - .input("test input") - .model("text-embedding-ada-002") - .build() - - // Debug information - println( - "EmbeddingDefaults.defaultEncodingFormat = ${EmbeddingDefaults.defaultEncodingFormat}" - ) - println("params.encodingFormat() = ${params.encodingFormat()}") - println("params.encodingFormat().isPresent = ${params.encodingFormat().isPresent}") - if (params.encodingFormat().isPresent) { - println("params.encodingFormat().get() = ${params.encodingFormat().get()}") - } - - assertThat(params.encodingFormat().get()) - .describedAs("New instances must use the changed default setting") - .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) - - // Test default reset functionality - EmbeddingDefaults.resetToDefaults() - assertThat(EmbeddingDefaults.defaultEncodingFormat) - .describedAs("After reset, Base64 must be returned as default") - .isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) - } finally { - // Restore original default setting - EmbeddingDefaults.setDefaultEncodingFormat(originalDefault) - } - } - /** * Test EmbeddingValue validation functionality. * - Validation failure with empty Float array From 87510882d485e04891d93b455ab00d6e7b7efe45 Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Wed, 23 Jul 2025 15:13:07 +0900 Subject: [PATCH 09/17] Modified format Modified format --- .../models/embeddings/EmbeddingValue.kt | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt index 9fc17f967..d10c304c4 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt @@ -64,8 +64,7 @@ private constructor( fun asFloatList(): List = when { floats != null -> floats - base64 != null -> - decodeBase64ToFloatList(base64) // Automatic Base64 decoding + base64 != null -> decodeBase64ToFloatList(base64) // Automatic Base64 decoding else -> throw IllegalStateException("No valid embedding data") } @@ -101,8 +100,9 @@ private constructor( */ fun validate(): EmbeddingValue { accept( - object : Visitor { + object : Visitor { override fun visitFloatList(floatList: List) {} + override fun visitBase64String(base64String: String) {} } ) @@ -119,9 +119,7 @@ private constructor( override fun equals(other: Any?): Boolean { if (this === other) return true - return other is EmbeddingValue && - floats == other.floats && - base64 == other.base64 + return other is EmbeddingValue && floats == other.floats && base64 == other.base64 } override fun hashCode(): Int = Objects.hash(floats, base64) @@ -177,11 +175,10 @@ private constructor( * IEEE 754 floats in little-endian format. */ private fun encodeFloatListToBase64(floats: List): String { - return ByteBuffer.allocate(floats.size * 4).apply { - floats.forEach { putFloat(it) } - }.array().let { bytes -> - Base64.getEncoder().encodeToString(bytes) - } + return ByteBuffer.allocate(floats.size * 4) + .apply { floats.forEach { putFloat(it) } } + .array() + .let { bytes -> Base64.getEncoder().encodeToString(bytes) } } } From 6c72da52b947b822b8d533b4aa079d40d30613b1 Mon Sep 17 00:00:00 2001 From: Yoshio Terada Date: Wed, 23 Jul 2025 15:19:39 +0900 Subject: [PATCH 10/17] Delete plan2.md Delete plan2.md --- plan2.md | 108 ------------------------------------------------------- 1 file changed, 108 deletions(-) delete mode 100644 plan2.md diff --git a/plan2.md b/plan2.md deleted file mode 100644 index 0b5b12fdc..000000000 --- a/plan2.md +++ /dev/null @@ -1,108 +0,0 @@ -# Proposed Fixes for TomerAberbach's Comment - -## Comment Overview - -TomerAberbach suggested the following changes: - -1. Avoid applying the default encoding format in the current method. -2. Update the builder field to initialize directly with the default value: - - ```kotlin - private var encodingFormat: JsonField = JsonField.of(EmbeddingDefaults.defaultEncodingFormat) - ``` - -3. Remove the new internal method introduced for handling the default encoding format. - -## Proposed Solutions - -### Solution 1: Direct Field Initialization - -**Overview**: -Initialize the `encodingFormat` field in the builder directly with the default value. - -**Changes**: - -- Update the builder field: - - ```kotlin - private var encodingFormat: JsonField = JsonField.of(EmbeddingDefaults.defaultEncodingFormat) - ``` - -- Remove the internal method `_encodingFormat()`. - -**Advantages**: - -- Simplifies the code. -- Avoids redundant method calls. -- Aligns with the suggested approach. - -**Disadvantages**: - -- Requires careful testing to ensure backward compatibility. - -### Solution 2: Lazy Initialization - -**Overview**: -Use lazy initialization for the `encodingFormat` field to set the default value only when accessed. - -**Changes**: - -- Update the builder field: - - ```kotlin - private var encodingFormat: JsonField by lazy { - JsonField.of(EmbeddingDefaults.defaultEncodingFormat) - } - ``` - -- Remove the internal method `_encodingFormat()`. - -**Advantages**: - -- Ensures the default value is only set when needed. -- Reduces memory usage for unused fields. - -**Disadvantages**: - -- Slightly increases complexity. -- Lazy initialization may introduce subtle bugs if not handled properly. - -### Solution 3: Default Value in Constructor - -**Overview**: - -Set the default value for `encodingFormat` in the constructor of the builder. - -**Changes**: - -- Update the builder constructor: - - ```kotlin - class Builder internal constructor() { - private var encodingFormat: JsonField = JsonField.of(EmbeddingDefaults.defaultEncodingFormat) - // ...existing code... - } - ``` - -- Remove the internal method `_encodingFormat()`. - -**Advantages**: - -- Ensures the default value is set during object creation. -- Simplifies field initialization. - -**Disadvantages**: - -- Requires changes to the constructor logic. -- May affect existing tests relying on the previous initialization method. - -## Summary - -| Solution | Advantages | Disadvantages | -|----------|------------|----------------| -| Direct Field Initialization | Simplifies code, avoids redundancy | Requires careful testing | -| Lazy Initialization | Reduces memory usage, sets default only when needed | Slightly complex, potential bugs | -| Default Value in Constructor | Ensures default during creation, simplifies initialization | Changes constructor logic, affects tests | - -**Recommendation**: -Solution 1 (Direct Field Initialization) is recommended for its simplicity and alignment with the suggested approach. It avoids unnecessary complexity while ensuring the default value is set correctly. From 356702b7ce7c3d1dc6b24414553b8aa400a585c7 Mon Sep 17 00:00:00 2001 From: Tomer Aberbach Date: Wed, 23 Jul 2025 13:02:25 -0400 Subject: [PATCH 11/17] fix: set the default correctly --- .../models/embeddings/EmbeddingCreateParams.kt | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt index dbc37f78c..c9ebb4fee 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt @@ -78,9 +78,6 @@ private constructor( * The format to return the embeddings in. Can be either `float` or * [`base64`](https://pypi.org/project/pybase64/). * - * Returns the encoding format that was set (either explicitly or via default) when this - * EmbeddingCreateParams instance was built. - * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type (e.g. if the * server responded with an unexpected value). */ @@ -159,7 +156,6 @@ private constructor( private var body: Body.Builder = Body.builder() private var additionalHeaders: Headers.Builder = Headers.builder() private var additionalQueryParams: QueryParams.Builder = QueryParams.builder() - private var encodingFormat: JsonField = JsonField.of(EncodingFormat.BASE64) @JvmSynthetic internal fun from(embeddingCreateParams: EmbeddingCreateParams) = apply { @@ -263,7 +259,7 @@ private constructor( * [`base64`](https://pypi.org/project/pybase64/). */ fun encodingFormat(encodingFormat: EncodingFormat) = apply { - this.encodingFormat = JsonField.of(encodingFormat) + body.encodingFormat(encodingFormat) } /** @@ -274,7 +270,7 @@ private constructor( * supported value. */ fun encodingFormat(encodingFormat: JsonField) = apply { - this.encodingFormat = encodingFormat + body.encodingFormat(encodingFormat) } /** @@ -422,13 +418,12 @@ private constructor( * * @throws IllegalStateException if any required field is unset. */ - fun build(): EmbeddingCreateParams { - return EmbeddingCreateParams( + fun build(): EmbeddingCreateParams = + EmbeddingCreateParams( body.build(), additionalHeaders.build(), additionalQueryParams.build(), ) - } } fun _body(): Body = body @@ -586,7 +581,8 @@ private constructor( private var input: JsonField? = null private var model: JsonField? = null private var dimensions: JsonField = JsonMissing.of() - private var encodingFormat: JsonField = JsonMissing.of() + private var encodingFormat: JsonField = + JsonField.of(EncodingFormat.BASE64) private var user: JsonField = JsonMissing.of() private var additionalProperties: MutableMap = mutableMapOf() From d1ef92fb4020b7803c83c94d15a28968c9c1c745 Mon Sep 17 00:00:00 2001 From: Tomer Aberbach Date: Wed, 23 Jul 2025 13:08:04 -0400 Subject: [PATCH 12/17] fix: rename some things --- .../com/openai/models/embeddings/Embedding.kt | 8 ++-- .../models/embeddings/EmbeddingValue.kt | 38 +++++++++---------- .../EmbeddingValueIntegrationTest.kt | 36 +++++++++--------- 3 files changed, 40 insertions(+), 42 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt index 8ded8a538..62c5354bd 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt @@ -56,9 +56,7 @@ private constructor( fun embedding(): List = when { embeddingValue != null -> - embeddingValue - .getRequired("embedding") - .asFloatList() // Base64→Float auto conversion + embeddingValue.getRequired("embedding").asFloats() // Base64→Float auto conversion !embedding.isMissing() -> embedding.getRequired("embedding") // Original Float format data else -> throw OpenAIInvalidDataException("Embedding data is missing") @@ -75,7 +73,7 @@ private constructor( fun embeddingValue(): EmbeddingValue = when { embeddingValue != null -> embeddingValue.getRequired("embedding") - !embedding.isMissing() -> EmbeddingValue.ofFloatList(embedding.getRequired("embedding")) + !embedding.isMissing() -> EmbeddingValue.ofFloats(embedding.getRequired("embedding")) else -> throw OpenAIInvalidDataException("Embedding data is missing") } @@ -111,7 +109,7 @@ private constructor( when { embeddingValue != null -> embeddingValue !embedding.isMissing() -> - JsonField.of(EmbeddingValue.ofFloatList(embedding.getRequired("embedding"))) + JsonField.of(EmbeddingValue.ofFloats(embedding.getRequired("embedding"))) else -> JsonMissing.of() } diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt index d10c304c4..3339a74ba 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt @@ -34,19 +34,19 @@ private constructor( ) { /** Returns the embedding as a list of floats, or null if this value represents base64 data. */ - fun floatList(): List? = floats + fun floats(): List? = floats /** * Returns the embedding as a base64-encoded string, or null if this value represents float * data. */ - fun base64String(): String? = base64 + fun base64(): String? = base64 /** Returns true if this value contains float list data. */ - fun isFloatList(): Boolean = floats != null + fun isFloats(): Boolean = floats != null /** Returns true if this value contains base64 string data. */ - fun isBase64String(): Boolean = base64 != null + fun isBase64(): Boolean = base64 != null /** * Returns the embedding data as a list of floats. @@ -61,10 +61,10 @@ private constructor( * * @return Decoded embedding data in List format */ - fun asFloatList(): List = + fun asFloats(): List = when { floats != null -> floats - base64 != null -> decodeBase64ToFloatList(base64) // Automatic Base64 decoding + base64 != null -> decodeBase64ToFloats(base64) // Automatic Base64 decoding else -> throw IllegalStateException("No valid embedding data") } @@ -72,10 +72,10 @@ private constructor( * Returns the embedding data as a base64-encoded string. If the data is a float list, it will * be encoded automatically. */ - fun asBase64String(): String = + fun asBase64(): String = when { base64 != null -> base64 - floats != null -> encodeFloatListToBase64(floats) + floats != null -> encodeFloatsToBase64(floats) else -> throw IllegalStateException("No valid embedding data") } @@ -85,8 +85,8 @@ private constructor( /** Accepts a visitor that can handle both float list and base64 string cases. */ fun accept(visitor: Visitor): T = when { - floats != null -> visitor.visitFloatList(floats) - base64 != null -> visitor.visitBase64String(base64) + floats != null -> visitor.visitFloats(floats) + base64 != null -> visitor.visitBase64(base64) else -> visitor.unknown(_json) } @@ -101,9 +101,9 @@ private constructor( fun validate(): EmbeddingValue { accept( object : Visitor { - override fun visitFloatList(floatList: List) {} + override fun visitFloats(floats: List) {} - override fun visitBase64String(base64String: String) {} + override fun visitBase64(base64: String) {} } ) return this // Return this instance if validation succeeds @@ -142,19 +142,19 @@ private constructor( * @throws OpenAIInvalidDataException if validation fails */ @JvmStatic - fun ofFloatList(floats: List): EmbeddingValue { + fun ofFloats(floats: List): EmbeddingValue { return EmbeddingValue(floats = floats.toList()).apply { validate() } } /** * Creates an EmbeddingValue from a base64-encoded string. * - * @param base64String the base64-encoded string + * @param base64 the base64-encoded string * @return a new immutable EmbeddingValue instance * @throws OpenAIInvalidDataException if validation fails */ @JvmStatic - fun ofBase64String(base64: String): EmbeddingValue { + fun ofBase64(base64: String): EmbeddingValue { return EmbeddingValue(base64 = base64).apply { validate() } } @@ -162,7 +162,7 @@ private constructor( * Decodes a base64 string to a list of floats. Assumes the base64 string represents an * array of 32-bit IEEE 754 floats in little-endian format. */ - private fun decodeBase64ToFloatList(base64: String): List { + private fun decodeBase64ToFloats(base64: String): List { return Base64.getDecoder().decode(base64).let { bytes -> ByteBuffer.wrap(bytes).asFloatBuffer().let { buffer -> (0 until buffer.remaining()).map { buffer.get() } @@ -174,7 +174,7 @@ private constructor( * Encodes a list of floats to a base64 string. Encodes the floats as an array of 32-bit * IEEE 754 floats in little-endian format. */ - private fun encodeFloatListToBase64(floats: List): String { + private fun encodeFloatsToBase64(floats: List): String { return ByteBuffer.allocate(floats.size * 4) .apply { floats.forEach { putFloat(it) } } .array() @@ -184,9 +184,9 @@ private constructor( /** Visitor interface for handling different types of embedding data. */ interface Visitor { - fun visitFloatList(floats: List): T + fun visitFloats(floats: List): T - fun visitBase64String(base64: String): T + fun visitBase64(base64: String): T fun unknown(json: JsonValue?): T { throw OpenAIInvalidDataException("Unknown EmbeddingValue: $json") diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt index 066c7b45b..fed8dfdb4 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt @@ -25,29 +25,29 @@ class EmbeddingValueIntegrationTest { val floats = listOf(1.0f, 2.0f, 3.0f, 4.0f) // Create EmbeddingValue from Float array - val embeddingFromFloat = EmbeddingValue.ofFloatList(floats) - assertThat(embeddingFromFloat.isFloatList()) + val embeddingFromFloat = EmbeddingValue.ofFloats(floats) + assertThat(embeddingFromFloat.isFloats()) .describedAs("EmbeddingValue created from Float array must be in Float format") .isTrue() - assertThat(embeddingFromFloat.asFloatList()) + assertThat(embeddingFromFloat.asFloats()) .describedAs("Float array contents must match") .isEqualTo(floats) // Test conversion to Base64 - val base64 = embeddingFromFloat.asBase64String() + val base64 = embeddingFromFloat.asBase64() assertThat(base64).describedAs("Base64 string must not be empty").isNotEmpty() // Create EmbeddingValue from Base64 string - val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64) - assertThat(embeddingFromBase64.isBase64String()) + val embeddingFromBase64 = EmbeddingValue.ofBase64(base64) + assertThat(embeddingFromBase64.isBase64()) .describedAs("EmbeddingValue created from Base64 string must be in Base64 format") .isTrue() - assertThat(embeddingFromBase64.base64String()) + assertThat(embeddingFromBase64.base64()) .describedAs("Base64 string contents must match") .isEqualTo(base64) // Test auto-decode: Base64 → List - val decodedFloats = embeddingFromBase64.asFloatList() + val decodedFloats = embeddingFromBase64.asFloats() assertThat(decodedFloats) .describedAs("Decoded Float array must match the original array") .isEqualTo(floats) @@ -126,7 +126,7 @@ class EmbeddingValueIntegrationTest { fun testEmbeddingValueValidation() { // Test validation success with valid data val validFloats = listOf(1.0f, 2.0f, 3.0f) - val validEmbedding = EmbeddingValue.ofFloatList(validFloats) + val validEmbedding = EmbeddingValue.ofFloats(validFloats) assertThat(validEmbedding.validate()) .describedAs("Validation with valid data must succeed") @@ -143,15 +143,15 @@ class EmbeddingValueIntegrationTest { @DisplayName("Test EmbeddingValue visitor pattern") fun testEmbeddingValueVisitorPattern() { val floats = listOf(1.0f, 2.0f, 3.0f) - val embeddingFromFloat = EmbeddingValue.ofFloatList(floats) + val embeddingFromFloat = EmbeddingValue.ofFloats(floats) // Visitor for Float array case val floatResult = embeddingFromFloat.accept( object : EmbeddingValue.Visitor { - override fun visitFloatList(floats: List): String = "float_visited" + override fun visitFloats(floats: List): String = "float_visited" - override fun visitBase64String(base64: String): String = "base64_visited" + override fun visitBase64(base64: String): String = "base64_visited" override fun unknown(json: com.openai.core.JsonValue?): String = "unknown_visited" @@ -159,19 +159,19 @@ class EmbeddingValueIntegrationTest { ) assertThat(floatResult) - .describedAs("For Float array case, visitFloatList must be called") + .describedAs("For Float array case, visitFloats must be called") .isEqualTo("float_visited") // Visitor for Base64 case - val base64 = embeddingFromFloat.asBase64String() - val embeddingFromBase64 = EmbeddingValue.ofBase64String(base64) + val base64 = embeddingFromFloat.asBase64() + val embeddingFromBase64 = EmbeddingValue.ofBase64(base64) val base64Result = embeddingFromBase64.accept( object : EmbeddingValue.Visitor { - override fun visitFloatList(floats: List): String = "float_visited" + override fun visitFloats(floats: List): String = "float_visited" - override fun visitBase64String(base64String: String): String = "base64_visited" + override fun visitBase64(base64: String): String = "base64_visited" override fun unknown(json: com.openai.core.JsonValue?): String = "unknown_visited" @@ -179,7 +179,7 @@ class EmbeddingValueIntegrationTest { ) assertThat(base64Result) - .describedAs("For Base64 string case, visitBase64String must be called") + .describedAs("For Base64 string case, visitBase64 must be called") .isEqualTo("base64_visited") } } From 25d2e79db939080c39bd0a4e8dfd017c61672693 Mon Sep 17 00:00:00 2001 From: Tomer Aberbach Date: Wed, 23 Jul 2025 13:58:34 -0400 Subject: [PATCH 13/17] chore: EmbeddingValue refactor --- .../models/embeddings/EmbeddingValue.kt | 131 ++++++++---------- 1 file changed, 54 insertions(+), 77 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt index 3339a74ba..ee6237453 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt @@ -13,10 +13,12 @@ import com.openai.core.BaseDeserializer import com.openai.core.BaseSerializer import com.openai.core.JsonValue import com.openai.core.allMaxBy +import com.openai.core.toImmutable import com.openai.errors.OpenAIInvalidDataException import java.nio.ByteBuffer import java.util.Base64 import java.util.Objects +import java.util.Optional /** * Represents embedding data that can be either a list of floats or base64-encoded string. This @@ -33,16 +35,11 @@ private constructor( private val _json: JsonValue? = null, ) { - /** Returns the embedding as a list of floats, or null if this value represents base64 data. */ - fun floats(): List? = floats + fun floats(): Optional> = Optional.ofNullable(floats) - /** - * Returns the embedding as a base64-encoded string, or null if this value represents float - * data. - */ - fun base64(): String? = base64 + fun base64(): Optional = Optional.ofNullable(base64) - /** Returns true if this value contains float list data. */ + /** Returns true if this value contains a list of floats. */ fun isFloats(): Boolean = floats != null /** Returns true if this value contains base64 string data. */ @@ -51,38 +48,29 @@ private constructor( /** * Returns the embedding data as a list of floats. * - * **Important feature: Automatic Base64 decoding** This method is the core part of backward - * compatibility. When data is stored in Base64 format, it automatically decodes and returns - * List, so existing user code requires no changes. - * - * Processing flow: - * - Float format data → Return as-is - * - Base64 format data → Automatically decode and return as List - * - * @return Decoded embedding data in List format + * If this value represents base64 string data, then it's decoded into floats. */ fun asFloats(): List = when { floats != null -> floats - base64 != null -> decodeBase64ToFloats(base64) // Automatic Base64 decoding - else -> throw IllegalStateException("No valid embedding data") + base64 != null -> decodeBase64ToFloats(base64) + else -> throw IllegalStateException("Invalid EmbeddingValue") } /** - * Returns the embedding data as a base64-encoded string. If the data is a float list, it will - * be encoded automatically. + * Returns the embedding data as a base64 string. + * + * If this value represents a list of floats, then it's decoded into floats. */ fun asBase64(): String = when { base64 != null -> base64 - floats != null -> encodeFloatsToBase64(floats) - else -> throw IllegalStateException("No valid embedding data") + floats != null -> encodeFloatsAsBase64(floats) + else -> throw IllegalStateException("Invalid EmbeddingValue") } - /** Returns the raw JSON value for debugging purposes. */ fun _json(): JsonValue? = _json - /** Accepts a visitor that can handle both float list and base64 string cases. */ fun accept(visitor: Visitor): T = when { floats != null -> visitor.visitFloats(floats) @@ -90,15 +78,7 @@ private constructor( else -> visitor.unknown(_json) } - /** - * Validates the embedding data and returns a new validated instance. This method is immutable - - * it returns a new instance if validation is successful, or throws an exception if validation - * fails. - * - * @return this instance if validation succeeds - * @throws OpenAIInvalidDataException if validation fails - */ - fun validate(): EmbeddingValue { + fun validate() = apply { accept( object : Visitor { override fun visitFloats(floats: List) {} @@ -106,7 +86,6 @@ private constructor( override fun visitBase64(base64: String) {} } ) - return this // Return this instance if validation succeeds } fun isValid(): Boolean = @@ -117,12 +96,28 @@ private constructor( false } + /** + * Returns a score indicating how many valid values are contained in this object. + * + * Used for best match union deserialization. + */ + @JvmSynthetic + internal fun validity(): Int = + when { + floats != null -> floats.size + base64 != null -> 1 + else -> 0 + } + override fun equals(other: Any?): Boolean { - if (this === other) return true - return other is EmbeddingValue && floats == other.floats && base64 == other.base64 + if (this === other) { + return true + } + + return /* spotless:off */ other is EmbeddingValue && floats == other.floats && base64 == other.base64 && _json == other._json /* spotless:on */ } - override fun hashCode(): Int = Objects.hash(floats, base64) + override fun hashCode(): Int = /* spotless:off */ Objects.hash(floats, base64, _json) /* spotless:on */ override fun toString(): String = when { @@ -133,61 +128,52 @@ private constructor( } companion object { - /** - * Creates an EmbeddingValue from a list of floats. The input list is defensively copied to - * ensure immutability. - * - * @param floats the list of float values (will be copied) - * @return a new immutable EmbeddingValue instance - * @throws OpenAIInvalidDataException if validation fails - */ - @JvmStatic - fun ofFloats(floats: List): EmbeddingValue { - return EmbeddingValue(floats = floats.toList()).apply { validate() } - } - /** - * Creates an EmbeddingValue from a base64-encoded string. - * - * @param base64 the base64-encoded string - * @return a new immutable EmbeddingValue instance - * @throws OpenAIInvalidDataException if validation fails - */ - @JvmStatic - fun ofBase64(base64: String): EmbeddingValue { - return EmbeddingValue(base64 = base64).apply { validate() } - } + @JvmStatic fun ofFloats(floats: List) = EmbeddingValue(floats = floats.toImmutable()) + + @JvmStatic fun ofBase64(base64: String) = EmbeddingValue(base64 = base64) /** * Decodes a base64 string to a list of floats. Assumes the base64 string represents an * array of 32-bit IEEE 754 floats in little-endian format. */ - private fun decodeBase64ToFloats(base64: String): List { - return Base64.getDecoder().decode(base64).let { bytes -> + private fun decodeBase64ToFloats(base64: String): List = + Base64.getDecoder().decode(base64).let { bytes -> ByteBuffer.wrap(bytes).asFloatBuffer().let { buffer -> (0 until buffer.remaining()).map { buffer.get() } } } - } /** * Encodes a list of floats to a base64 string. Encodes the floats as an array of 32-bit * IEEE 754 floats in little-endian format. */ - private fun encodeFloatsToBase64(floats: List): String { - return ByteBuffer.allocate(floats.size * 4) + private fun encodeFloatsAsBase64(floats: List): String = + ByteBuffer.allocate(floats.size * 4) .apply { floats.forEach { putFloat(it) } } .array() .let { bytes -> Base64.getEncoder().encodeToString(bytes) } - } } - /** Visitor interface for handling different types of embedding data. */ + /** + * An interface that defines how to map each variant of [EmbeddingValue] to a value of type [T]. + */ interface Visitor { + fun visitFloats(floats: List): T fun visitBase64(base64: String): T + /** + * Maps an unknown variant of [EmbeddingValue] to a value of type [T]. + * + * An instance of [EmbeddingValue] can contain an unknown variant if it was deserialized + * from data that doesn't match any known variant. For example, if the SDK is on an older + * version than the API, then the API may respond with new variants that the SDK is unaware + * of. + * + * @throws OpenAIInvalidDataException in the default implementation. + */ fun unknown(json: JsonValue?): T { throw OpenAIInvalidDataException("Unknown EmbeddingValue: $json") } @@ -232,13 +218,4 @@ private constructor( } } } - - /** Returns a score indicating how many valid values are contained in this object. */ - @JvmSynthetic - internal fun validity(): Int = - when { - floats != null -> floats.size - base64 != null -> 1 - else -> 0 - } } From 40faeaf14355c4610733b2e18fc1306c0ff5b2f8 Mon Sep 17 00:00:00 2001 From: Tomer Aberbach Date: Wed, 23 Jul 2025 14:28:48 -0400 Subject: [PATCH 14/17] refactor: embedding data model --- .../com/openai/models/embeddings/Embedding.kt | 134 ++++++------------ .../models/embeddings/EmbeddingValue.kt | 4 +- .../openai/models/embeddings/EmbeddingTest.kt | 26 ++++ 3 files changed, 74 insertions(+), 90 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt index 62c5354bd..3a786e287 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt @@ -12,7 +12,6 @@ import com.openai.core.JsonMissing import com.openai.core.JsonValue import com.openai.core.checkKnown import com.openai.core.checkRequired -import com.openai.core.toImmutable import com.openai.errors.OpenAIInvalidDataException import java.util.Collections import java.util.Objects @@ -21,8 +20,7 @@ import kotlin.jvm.optionals.getOrNull /** Represents an embedding vector returned by embedding endpoint. */ class Embedding private constructor( - private val embedding: JsonField>, - private val embeddingValue: JsonField?, + private val embedding: JsonField, private val index: JsonField, private val object_: JsonValue, private val additionalProperties: MutableMap, @@ -35,47 +33,25 @@ private constructor( embedding: JsonField = JsonMissing.of(), @JsonProperty("index") @ExcludeMissing index: JsonField = JsonMissing.of(), @JsonProperty("object") @ExcludeMissing object_: JsonValue = JsonMissing.of(), - ) : this( - JsonMissing.of(), // Legacy embedding field will be populated from embeddingValue - embedding, - index, - object_, - mutableMapOf(), - ) + ) : this(embedding, index, object_, mutableMapOf()) /** * The embedding vector, which is a list of floats. The length of vector depends on the model as * listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings). * - * Important: When Base64 data is received, it is automatically decoded and returned as - * List - * * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is * unexpectedly missing or null (e.g. if the server responded with an unexpected value). */ - fun embedding(): List = - when { - embeddingValue != null -> - embeddingValue.getRequired("embedding").asFloats() // Base64→Float auto conversion - !embedding.isMissing() -> - embedding.getRequired("embedding") // Original Float format data - else -> throw OpenAIInvalidDataException("Embedding data is missing") - } + fun embedding(): List = embeddingValue().asFloats() /** * The embedding data in its original format (either float list or base64 string). This method * provides efficient access to the embedding data without unnecessary conversions. * - * @return EmbeddingValue containing the embedding data in its original format * @throws OpenAIInvalidDataException if the JSON field has an unexpected type or is * unexpectedly missing or null (e.g. if the server responded with an unexpected value). */ - fun embeddingValue(): EmbeddingValue = - when { - embeddingValue != null -> embeddingValue.getRequired("embedding") - !embedding.isMissing() -> EmbeddingValue.ofFloats(embedding.getRequired("embedding")) - else -> throw OpenAIInvalidDataException("Embedding data is missing") - } + fun embeddingValue(): EmbeddingValue = embedding.getRequired("embedding") /** * The index of the embedding in the list of embeddings. @@ -103,15 +79,16 @@ private constructor( * * Unlike [embedding], this method doesn't throw if the JSON field has an unexpected type. */ + fun _embedding(): JsonField> = embedding.map { it.asFloats() } + + /** + * Returns the raw JSON value of [embedding]. + * + * Unlike [embeddingValue], this method doesn't throw if the JSON field has an unexpected type. + */ @JsonProperty("embedding") @ExcludeMissing - fun _embedding(): JsonField = - when { - embeddingValue != null -> embeddingValue - !embedding.isMissing() -> - JsonField.of(EmbeddingValue.ofFloats(embedding.getRequired("embedding"))) - else -> JsonMissing.of() - } + fun _embeddingValue(): JsonField = embedding /** * Returns the raw JSON value of [index]. @@ -149,30 +126,38 @@ private constructor( /** A builder for [Embedding]. */ class Builder internal constructor() { - private var embedding: JsonField>? = null + private var embeddingFloats: MutableList? = null + private var embedding: JsonField? = null private var index: JsonField? = null private var object_: JsonValue = JsonValue.from("embedding") private var additionalProperties: MutableMap = mutableMapOf() @JvmSynthetic internal fun from(embedding: Embedding) = apply { - try { - this.embedding = JsonField.of(embedding.embedding().toMutableList()) - } catch (e: Exception) { - // Fallback to field-level copying if embedding() method fails - this.embedding = embedding.embedding.map { it.toMutableList() } - } + this.embedding = embedding.embedding index = embedding.index object_ = embedding.object_ additionalProperties = embedding.additionalProperties.toMutableMap() } + /** + * The embedding vector. The length of vector depends on the model as listed in the + * [embedding guide](https://platform.openai.com/docs/guides/embeddings). + */ + fun embedding(embedding: EmbeddingValue) = embedding(JsonField.of(embedding)) + /** * The embedding vector, which is a list of floats. The length of vector depends on the * model as listed in the * [embedding guide](https://platform.openai.com/docs/guides/embeddings). */ - fun embedding(embedding: List) = embedding(JsonField.of(embedding)) + fun embedding(floats: List) = embedding(EmbeddingValue.ofFloats(floats)) + + /** + * The embedding vector, which is a base64 string. The length of vector depends on the model + * as listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings). + */ + fun embedding(base64: String) = embedding(EmbeddingValue.ofBase64(base64)) /** * Sets [Builder.embedding] to an arbitrary JSON value. @@ -181,8 +166,9 @@ private constructor( * instead. This method is primarily for setting the field to an undocumented or not yet * supported value. */ - fun embedding(embedding: JsonField>) = apply { - this.embedding = embedding.map { it.toMutableList() } + fun embedding(embedding: JsonField) = apply { + embeddingFloats = null + this.embedding = embedding } /** @@ -191,10 +177,12 @@ private constructor( * @throws IllegalStateException if the field was previously set to a non-list. */ fun addEmbedding(embedding: Float) = apply { - this.embedding = - (this.embedding ?: JsonField.of(mutableListOf())).also { - checkKnown("embedding", it).add(embedding) - } + embeddingFloats = + (this.embedding?.let { checkKnown("embedding", it) }?.asFloats()?.toMutableList() + ?: embeddingFloats + ?: mutableListOf()) + .apply { add(embedding) } + this.embedding = null } /** The index of the embedding in the list of embeddings. */ @@ -256,8 +244,10 @@ private constructor( */ fun build(): Embedding = Embedding( - checkRequired("embedding", embedding).map { it.toImmutable() }, - null, // embeddingValue - will be null for builder-created instances + checkRequired( + "embedding", + embedding ?: embeddingFloats?.let { JsonField.of(EmbeddingValue.ofFloats(it)) }, + ), checkRequired("index", index), object_, additionalProperties.toMutableMap(), @@ -271,7 +261,7 @@ private constructor( return@apply } - embedding() // This will call the method that returns List + embeddingValue().validate() index() _object_().let { if (it != JsonValue.from("embedding")) { @@ -296,11 +286,7 @@ private constructor( */ @JvmSynthetic internal fun validity(): Int = - when { - embeddingValue != null -> embeddingValue.asKnown().getOrNull()?.validity() ?: 0 - !embedding.isMissing() -> embedding.asKnown().getOrNull()?.size ?: 0 - else -> 0 - } + + (embedding.asKnown().getOrNull()?.validity() ?: 0) + (if (index.asKnown().isPresent) 1 else 0) + object_.let { if (it == JsonValue.from("embedding")) 1 else 0 } @@ -309,43 +295,15 @@ private constructor( return true } - if (other !is Embedding) { - return false - } - - return try { - embedding() == other.embedding() && - index == other.index && - object_ == other.object_ && - additionalProperties == other.additionalProperties - } catch (e: Exception) { - // Fallback to field-level comparison if embedding() methods fail - embedding == other.embedding && - embeddingValue == other.embeddingValue && - index == other.index && - object_ == other.object_ && - additionalProperties == other.additionalProperties - } + return /* spotless:off */ other is Embedding && embedding == other.embedding && index == other.index && object_ == other.object_ && additionalProperties == other.additionalProperties /* spotless:on */ } /* spotless:off */ - private val hashCode: Int by lazy { - try { - Objects.hash(embedding(), index, object_, additionalProperties) - } catch (e: Exception) { - // Fallback to field-level hashing if embedding() method fails - Objects.hash(embedding, embeddingValue, index, object_, additionalProperties) - } - } + private val hashCode: Int by lazy { Objects.hash(embedding, index, object_, additionalProperties) } /* spotless:on */ override fun hashCode(): Int = hashCode override fun toString() = - when { - embeddingValue != null -> - "Embedding{embedding=${try { embedding() } catch (e: Exception) { "[]" }}, index=$index, object_=$object_, additionalProperties=$additionalProperties}" - else -> - "Embedding{embedding=${embedding.asKnown().getOrNull() ?: emptyList()}, index=$index, object_=$object_, additionalProperties=$additionalProperties}" - } + "Embedding{embedding=$embedding, index=$index, object_=$object_, additionalProperties=$additionalProperties}" } diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt index ee6237453..7bad18699 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt @@ -114,10 +114,10 @@ private constructor( return true } - return /* spotless:off */ other is EmbeddingValue && floats == other.floats && base64 == other.base64 && _json == other._json /* spotless:on */ + return /* spotless:off */ other is EmbeddingValue && floats == other.floats && base64 == other.base64 /* spotless:on */ } - override fun hashCode(): Int = /* spotless:off */ Objects.hash(floats, base64, _json) /* spotless:on */ + override fun hashCode(): Int = /* spotless:off */ Objects.hash(floats, base64) /* spotless:on */ override fun toString(): String = when { diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingTest.kt index 393c05cd4..57311de18 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingTest.kt @@ -17,6 +17,32 @@ internal class EmbeddingTest { assertThat(embedding.index()).isEqualTo(0L) } + @Test + fun create_setThenAdd() { + val embedding = + Embedding.builder() + .embedding(EmbeddingValue.ofFloats(listOf(1.0f, 2.0f))) + .addEmbedding(3.0f) + .index(0L) + .build() + + assertThat(embedding.embedding()).containsExactly(1.0f, 2.0f, 3.0f) + assertThat(embedding.index()).isEqualTo(0L) + } + + @Test + fun create_addThenSet() { + val embedding = + Embedding.builder() + .addEmbedding(3.0f) + .embedding(EmbeddingValue.ofFloats(listOf(1.0f, 2.0f))) + .index(0L) + .build() + + assertThat(embedding.embedding()).containsExactly(1.0f, 2.0f) + assertThat(embedding.index()).isEqualTo(0L) + } + @Test fun roundtrip() { val jsonMapper = jsonMapper() From 7eb0479756c7dde768069cc6bc444d99fd70af3d Mon Sep 17 00:00:00 2001 From: Tomer Aberbach Date: Wed, 23 Jul 2025 14:30:40 -0400 Subject: [PATCH 15/17] chore: delete test with no asserts --- .../models/embeddings/EmbeddingStepTest.kt | 62 ------------------- 1 file changed, 62 deletions(-) delete mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt deleted file mode 100644 index e2f3ab856..000000000 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingStepTest.kt +++ /dev/null @@ -1,62 +0,0 @@ -package com.openai.models.embeddings - -import org.junit.jupiter.api.DisplayName -import org.junit.jupiter.api.Test - -/** Step-by-step trace test */ -@DisplayName("Step Test") -class EmbeddingStepTest { - - @Test - @DisplayName("Step 1: Check initial state") - fun step1_checkInitialState() { - println("===== Step 1: Check initial state =====") - val params = - EmbeddingCreateParams.builder().input("test").model("text-embedding-ada-002").build() - val defaultFormat = params.encodingFormat().orElse(null) - println("Default encodingFormat in params = $defaultFormat") - println("EncodingFormat.BASE64 = ${EmbeddingCreateParams.EncodingFormat.BASE64}") - println("EncodingFormat.FLOAT = ${EmbeddingCreateParams.EncodingFormat.FLOAT}") - println( - "Is default BASE64? ${defaultFormat == EmbeddingCreateParams.EncodingFormat.BASE64}" - ) - } - - @Test - @DisplayName("Step 2: Check builder creation") - fun step2_checkBuilder() { - println("===== Step 2: Check builder creation =====") - val builder = EmbeddingCreateParams.builder().input("test").model("text-embedding-ada-002") - println("Builder created") - - // Check state before build - println("About to build...") - val params = builder.build() - println("Build completed") - - val encodingFormat = params.encodingFormat() - println("encodingFormat() result: $encodingFormat") - println("isPresent: ${encodingFormat.isPresent}") - if (encodingFormat.isPresent) { - println("Value: ${encodingFormat.get()}") - } - } - - @Test - @DisplayName("Step 3: Explicit Base64 setting") - fun step3_explicitBase64() { - println("===== Step 3: Explicit Base64 setting =====") - val params = - EmbeddingCreateParams.builder() - .input("test") - .model("text-embedding-ada-002") - .encodingFormat(EmbeddingCreateParams.EncodingFormat.BASE64) - .build() - - val encodingFormat = params.encodingFormat() - println("After explicit Base64 setting: $encodingFormat") - if (encodingFormat.isPresent) { - println("Value: ${encodingFormat.get()}") - } - } -} From 36fc22eb907b91f324afbb7085e7d8cf2a5dac68 Mon Sep 17 00:00:00 2001 From: Tomer Aberbach Date: Wed, 23 Jul 2025 14:35:34 -0400 Subject: [PATCH 16/17] chore: test changes --- .../EmbeddingValueIntegrationTest.kt | 185 ------------------ .../models/embeddings/EmbeddingValueTest.kt | 35 ++++ 2 files changed, 35 insertions(+), 185 deletions(-) delete mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt create mode 100644 openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueTest.kt diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt deleted file mode 100644 index fed8dfdb4..000000000 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueIntegrationTest.kt +++ /dev/null @@ -1,185 +0,0 @@ -package com.openai.models.embeddings - -import org.assertj.core.api.Assertions.assertThat -import org.junit.jupiter.api.DisplayName -import org.junit.jupiter.api.Test - -/** - * Integration test to verify Base64 default functionality and backward compatibility of - * EmbeddingValue. Ensures that both existing List usage and new Base64 format work - * correctly. - */ -@DisplayName("EmbeddingValue Integration Test") -class EmbeddingValueIntegrationTest { - - /** - * Test EmbeddingValue creation and format conversion functionality. - * - Creating EmbeddingValue from Float array - * - Converting to Base64 string - * - Creating EmbeddingValue from Base64 string - * - Auto-decode functionality (Base64 → List) - */ - @Test - @DisplayName("Test EmbeddingValue creation and format conversion") - fun testEmbeddingValueCreationAndConversion() { - val floats = listOf(1.0f, 2.0f, 3.0f, 4.0f) - - // Create EmbeddingValue from Float array - val embeddingFromFloat = EmbeddingValue.ofFloats(floats) - assertThat(embeddingFromFloat.isFloats()) - .describedAs("EmbeddingValue created from Float array must be in Float format") - .isTrue() - assertThat(embeddingFromFloat.asFloats()) - .describedAs("Float array contents must match") - .isEqualTo(floats) - - // Test conversion to Base64 - val base64 = embeddingFromFloat.asBase64() - assertThat(base64).describedAs("Base64 string must not be empty").isNotEmpty() - - // Create EmbeddingValue from Base64 string - val embeddingFromBase64 = EmbeddingValue.ofBase64(base64) - assertThat(embeddingFromBase64.isBase64()) - .describedAs("EmbeddingValue created from Base64 string must be in Base64 format") - .isTrue() - assertThat(embeddingFromBase64.base64()) - .describedAs("Base64 string contents must match") - .isEqualTo(base64) - - // Test auto-decode: Base64 → List - val decodedFloats = embeddingFromBase64.asFloats() - assertThat(decodedFloats) - .describedAs("Decoded Float array must match the original array") - .isEqualTo(floats) - } - - /** - * Test explicit Base64 encoding specification in EmbeddingCreateParams.Builder. Confirm that - * Base64 format can be explicitly specified using the encodingFormat() method. - */ - @Test - @DisplayName("Test explicit Base64 encoding specification in EmbeddingCreateParams") - fun testEmbeddingCreateParamsBuilderWithBase64Encoding() { - val params = - EmbeddingCreateParams.builder() - .input("test input") - .model("text-embedding-ada-002") - .encodingFormat(encodingFormat = EmbeddingCreateParams.EncodingFormat.BASE64) - .build() - - assertThat(params.encodingFormat()).describedAs("Encoding format must be set").isPresent() - assertThat(params.encodingFormat().get()) - .describedAs("Explicitly specified encoding format must be Base64") - .isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) - } - - /** - * Test default behavior of EmbeddingCreateParams. Confirm that Base64 is used by default when - * encoding format is not explicitly specified. - */ - @Test - @DisplayName("Test EmbeddingCreateParams default behavior") - fun testEmbeddingCreateParamsDefaultBehavior() { - val params = - EmbeddingCreateParams.builder() - .input("test input") - .model("text-embedding-ada-002") - .build() // Do not explicitly specify encoding format - - assertThat(params.encodingFormat()) - .describedAs("Encoding format must be set by default") - .isPresent() - assertThat(params.encodingFormat().get()) - .describedAs("Default encoding format must be Base64") - .isEqualTo(EmbeddingCreateParams.EncodingFormat.BASE64) - } - - /** - * Test explicit Float format specification for backward compatibility. Confirm that the - * traditional Float format can be explicitly specified using the encodingFormat() method. - */ - @Test - @DisplayName("Test explicit Float format specification for backward compatibility") - fun testEmbeddingCreateParamsFloatCompatibility() { - val params = - EmbeddingCreateParams.builder() - .input("test input") - .model("text-embedding-ada-002") - .encodingFormat(encodingFormat = EmbeddingCreateParams.EncodingFormat.FLOAT) - .build() - - assertThat(params.encodingFormat()).describedAs("Encoding format must be set").isPresent() - assertThat(params.encodingFormat().get()) - .describedAs( - "Explicitly specified encoding format for backward compatibility must be Float" - ) - .isEqualTo(EmbeddingCreateParams.EncodingFormat.FLOAT) - } - - /** - * Test EmbeddingValue validation functionality. - * - Validation failure with empty Float array - * - Validation failure with invalid Base64 string - */ - @Test - @DisplayName("Test EmbeddingValue validation functionality") - fun testEmbeddingValueValidation() { - // Test validation success with valid data - val validFloats = listOf(1.0f, 2.0f, 3.0f) - val validEmbedding = EmbeddingValue.ofFloats(validFloats) - - assertThat(validEmbedding.validate()) - .describedAs("Validation with valid data must succeed") - .isNotNull() - .isEqualTo(validEmbedding) - } - - /** - * Test EmbeddingValue visitor pattern implementation. - * - Visitor call for Float array case - * - Visitor call for Base64 string case - */ - @Test - @DisplayName("Test EmbeddingValue visitor pattern") - fun testEmbeddingValueVisitorPattern() { - val floats = listOf(1.0f, 2.0f, 3.0f) - val embeddingFromFloat = EmbeddingValue.ofFloats(floats) - - // Visitor for Float array case - val floatResult = - embeddingFromFloat.accept( - object : EmbeddingValue.Visitor { - override fun visitFloats(floats: List): String = "float_visited" - - override fun visitBase64(base64: String): String = "base64_visited" - - override fun unknown(json: com.openai.core.JsonValue?): String = - "unknown_visited" - } - ) - - assertThat(floatResult) - .describedAs("For Float array case, visitFloats must be called") - .isEqualTo("float_visited") - - // Visitor for Base64 case - val base64 = embeddingFromFloat.asBase64() - val embeddingFromBase64 = EmbeddingValue.ofBase64(base64) - - val base64Result = - embeddingFromBase64.accept( - object : EmbeddingValue.Visitor { - override fun visitFloats(floats: List): String = "float_visited" - - override fun visitBase64(base64: String): String = "base64_visited" - - override fun unknown(json: com.openai.core.JsonValue?): String = - "unknown_visited" - } - ) - - assertThat(base64Result) - .describedAs("For Base64 string case, visitBase64 must be called") - .isEqualTo("base64_visited") - } -} diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueTest.kt new file mode 100644 index 000000000..2d60358c8 --- /dev/null +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueTest.kt @@ -0,0 +1,35 @@ +package com.openai.models.embeddings + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.Test + +internal class EmbeddingValueTest { + + @Test + fun ofFloats() { + val floats = listOf(1.0f, 2.0f, 3.0f, 4.0f) + + val embeddingValue = EmbeddingValue.ofFloats(floats) + + assertThat(embeddingValue.isFloats()).isTrue() + assertThat(embeddingValue.isBase64()).isFalse() + assertThat(embeddingValue.floats()).hasValue(floats) + assertThat(embeddingValue.base64()).isEmpty + assertThat(embeddingValue.asFloats()).isEqualTo(floats) + assertThat(embeddingValue.asBase64()).isEqualTo("P4AAAEAAAABAQAAAQIAAAA==") + } + + @Test + fun ofBase64() { + val base64 = "P4AAAEAAAABAQAAAQIAAAA==" + + val embeddingValue = EmbeddingValue.ofBase64(base64) + + assertThat(embeddingValue.isFloats()).isFalse() + assertThat(embeddingValue.isBase64()).isTrue() + assertThat(embeddingValue.floats()).isEmpty + assertThat(embeddingValue.base64()).hasValue(base64) + assertThat(embeddingValue.asFloats()).containsExactly(1.0f, 2.0f, 3.0f, 4.0f) + assertThat(embeddingValue.asBase64()).isEqualTo(base64) + } +} From d70a5da7e348815633d33e36bf70556e4ab3a50d Mon Sep 17 00:00:00 2001 From: Tomer Aberbach Date: Wed, 23 Jul 2025 14:51:56 -0400 Subject: [PATCH 17/17] fix: little-endian --- .../models/embeddings/EmbeddingValue.kt | 22 +++++++++++-------- .../models/embeddings/EmbeddingValueTest.kt | 4 ++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt index 7bad18699..1bd26ded0 100644 --- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt +++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt @@ -16,6 +16,7 @@ import com.openai.core.allMaxBy import com.openai.core.toImmutable import com.openai.errors.OpenAIInvalidDataException import java.nio.ByteBuffer +import java.nio.ByteOrder import java.util.Base64 import java.util.Objects import java.util.Optional @@ -137,22 +138,25 @@ private constructor( * Decodes a base64 string to a list of floats. Assumes the base64 string represents an * array of 32-bit IEEE 754 floats in little-endian format. */ - private fun decodeBase64ToFloats(base64: String): List = - Base64.getDecoder().decode(base64).let { bytes -> - ByteBuffer.wrap(bytes).asFloatBuffer().let { buffer -> - (0 until buffer.remaining()).map { buffer.get() } + private fun decodeBase64ToFloats(base64: String): List { + val bytes = Base64.getDecoder().decode(base64) + val floats = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer() + return buildList(floats.remaining()) { + while (floats.hasRemaining()) { + add(floats.get()) } } + } /** * Encodes a list of floats to a base64 string. Encodes the floats as an array of 32-bit * IEEE 754 floats in little-endian format. */ - private fun encodeFloatsAsBase64(floats: List): String = - ByteBuffer.allocate(floats.size * 4) - .apply { floats.forEach { putFloat(it) } } - .array() - .let { bytes -> Base64.getEncoder().encodeToString(bytes) } + private fun encodeFloatsAsBase64(floats: List): String { + val buffer = ByteBuffer.allocate(floats.size * 4).order(ByteOrder.LITTLE_ENDIAN) + floats.forEach { buffer.putFloat(it) } + return Base64.getEncoder().encodeToString(buffer.array()) + } } /** diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueTest.kt index 2d60358c8..663237712 100644 --- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueTest.kt +++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingValueTest.kt @@ -16,12 +16,12 @@ internal class EmbeddingValueTest { assertThat(embeddingValue.floats()).hasValue(floats) assertThat(embeddingValue.base64()).isEmpty assertThat(embeddingValue.asFloats()).isEqualTo(floats) - assertThat(embeddingValue.asBase64()).isEqualTo("P4AAAEAAAABAQAAAQIAAAA==") + assertThat(embeddingValue.asBase64()).isEqualTo("AACAPwAAAEAAAEBAAACAQA==") } @Test fun ofBase64() { - val base64 = "P4AAAEAAAABAQAAAQIAAAA==" + val base64 = "AACAPwAAAEAAAEBAAACAQA==" val embeddingValue = EmbeddingValue.ofBase64(base64)