diff --git a/docs/changelog/125599.yaml b/docs/changelog/125599.yaml new file mode 100644 index 0000000000000..e0057f50dd937 --- /dev/null +++ b/docs/changelog/125599.yaml @@ -0,0 +1,6 @@ +pr: 125599 +summary: Allow zero for `rescore_vector.oversample` to indicate by-passing oversample + and rescoring +area: Vector Search +type: enhancement +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml index 9747644a5ba6c..c9c9ef8daf2c9 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_bbq_hnsw.yml @@ -340,3 +340,260 @@ setup: - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } +--- +"Test index configured rescore vector updateable and settable to 0": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + + - do: + indices.create: + index: bbq_rescore_0_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + index_options: + type: bbq_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.create: + index: bbq_rescore_update_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + index_options: + type: bbq_hnsw + rescore_vector: + oversample: 1 + + - do: + indices.put_mapping: + index: bbq_rescore_update_hnsw + body: + properties: + vector: + type: dense_vector + index_options: + type: bbq_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.get_mapping: + index: bbq_rescore_update_hnsw + + - match: { .bbq_rescore_update_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 0 } +--- +"Test index configured rescore vector score consistency": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: bbq_rescore_zero_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + dims: 64 + index: true + similarity: max_inner_product + index_options: + type: bbq_hnsw + rescore_vector: + oversample: 0 + + - do: + bulk: + index: bbq_rescore_zero_hnsw + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313, 0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272, 0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132, -0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265, -0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475, -0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242, -0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45, -0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176] } + { "index": {"_id": "2"}} + { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + { "index": {"_id": "3"}} + { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: bbq_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: raw_score0 } + - set: { hits.hits.1._score: raw_score1 } + - set: { hits.hits.2._score: raw_score2 } + + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: bbq_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 2 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: override_score0 } + - set: { hits.hits.1._score: override_score1 } + - set: { hits.hits.2._score: override_score2 } + + - do: + indices.put_mapping: + index: bbq_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + dims: 64 + index: true + similarity: max_inner_product + index_options: + type: bbq_hnsw + rescore_vector: + oversample: 2 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: bbq_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: default_rescore0 } + - set: { hits.hits.1._score: default_rescore1 } + - set: { hits.hits.2._score: default_rescore2 } + + - do: + indices.put_mapping: + index: bbq_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + dims: 64 + index: true + similarity: max_inner_product + index_options: + type: bbq_hnsw + rescore_vector: + oversample: 0 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: bbq_rescore_zero_hnsw + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $override_score0 } + - match: { hits.hits.0._score: $default_rescore0 } + - match: { hits.hits.1._score: $override_score1 } + - match: { hits.hits.1._score: $default_rescore1 } + - match: { hits.hits.2._score: $override_score2 } + - match: { hits.hits.2._score: $default_rescore2 } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: bbq_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, + 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, + 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, + -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , + -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, + -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, + -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, + -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + k: 3 + num_candidates: 3 + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $raw_score0 } + - match: { hits.hits.1._score: $raw_score1 } + - match: { hits.hits.2._score: $raw_score2 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized.yml index fb45521cb47c6..a1ed9a2eca11b 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_byte_quantized.yml @@ -629,8 +629,7 @@ setup: properties: vector: type: dense_vector - dims: 64 - index: true + dims: 4 similarity: max_inner_product index_options: type: int8_hnsw @@ -643,12 +642,11 @@ setup: refresh: true body: | { "index": {"_id": "1"}} - { "vector": [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313, 0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272, 0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132, -0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265, -0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475, -0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242, -0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45, -0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176] } + { "vector": [230.0, 300.33, -34.8988, 15.555] } { "index": {"_id": "2"}} - { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + { "vector": [-0.5, 100.0, -13, 14.8] } { "index": {"_id": "3"}} - { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } - + { "vector": [0.5, 111.3, -13.0, 14.8] } - do: headers: Content-Type: application/json @@ -658,14 +656,7 @@ setup: body: knn: field: vector - query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, - 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, - 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, - -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , - -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, - -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, - -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, - -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + query_vector: [-0.5, 90.0, -10, 14.8] k: 3 num_candidates: 3 @@ -679,6 +670,7 @@ setup: Content-Type: application/json search: rest_total_hits_as_int: true + index: int8_rescore_hnsw body: query: script_score: @@ -686,17 +678,228 @@ setup: script: source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" params: - query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, - 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, - 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, - -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , - -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, - -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, - -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, - -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + query_vector: [-0.5, 90.0, -10, 14.8] # Compare scores as hit IDs may change depending on how things are distributed - match: { hits.total: 3 } - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } +--- +"Test index configured rescore vector updateable and settable to 0": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + + - do: + indices.create: + index: int8_rescore_0_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + index_options: + type: int8_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.create: + index: int8_rescore_update_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + index_options: + type: int8_hnsw + rescore_vector: + oversample: 1 + + - do: + indices.put_mapping: + index: int8_rescore_update_hnsw + body: + properties: + vector: + type: dense_vector + index_options: + type: int8_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.get_mapping: + index: int8_rescore_update_hnsw + + - match: { .int8_rescore_update_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 0 } +--- +"Test index configured rescore vector score consistency": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: int8_rescore_zero_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + similarity: max_inner_product + dims: 4 + index_options: + type: int8_hnsw + rescore_vector: + oversample: 0 + + - do: + bulk: + index: int8_rescore_zero_hnsw + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [230.0, 300.33, -34.8988, 15.555] } + { "index": {"_id": "2"}} + { "vector": [-0.5, 100.0, -13, 14.8] } + { "index": {"_id": "3"}} + { "vector": [0.5, 111.3, -13.0, 14.8] } + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: raw_score0 } + - set: { hits.hits.1._score: raw_score1 } + - set: { hits.hits.2._score: raw_score2 } + + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 2 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: override_score0 } + - set: { hits.hits.1._score: override_score1 } + - set: { hits.hits.2._score: override_score2 } + + - do: + indices.put_mapping: + index: int8_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + similarity: max_inner_product + dims: 4 + index_options: + type: int8_hnsw + rescore_vector: + oversample: 2 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: default_rescore0 } + - set: { hits.hits.1._score: default_rescore1 } + - set: { hits.hits.2._score: default_rescore2 } + + - do: + indices.put_mapping: + index: int8_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + similarity: max_inner_product + dims: 4 + index_options: + type: int8_hnsw + rescore_vector: + oversample: 0 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [-0.5, 90.0, -10, 14.8] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $override_score0 } + - match: { hits.hits.0._score: $default_rescore0 } + - match: { hits.hits.1._score: $override_score1 } + - match: { hits.hits.1._score: $default_rescore1 } + - match: { hits.hits.2._score: $override_score2 } + - match: { hits.hits.2._score: $default_rescore2 } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int8_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $raw_score0 } + - match: { hits.hits.1._score: $raw_score1 } + - match: { hits.hits.2._score: $raw_score2 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml index c2fe78ddbd532..c8511c3160c83 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml @@ -663,8 +663,7 @@ setup: properties: vector: type: dense_vector - dims: 64 - index: true + dims: 4 similarity: max_inner_product index_options: type: int4_hnsw @@ -677,11 +676,11 @@ setup: refresh: true body: | { "index": {"_id": "1"}} - { "vector": [0.077, 0.32 , -0.205, 0.63 , 0.032, 0.201, 0.167, -0.313, 0.176, 0.531, -0.375, 0.334, -0.046, 0.078, -0.349, 0.272, 0.307, -0.083, 0.504, 0.255, -0.404, 0.289, -0.226, -0.132, -0.216, 0.49 , 0.039, 0.507, -0.307, 0.107, 0.09 , -0.265, -0.285, 0.336, -0.272, 0.369, -0.282, 0.086, -0.132, 0.475, -0.224, 0.203, 0.439, 0.064, 0.246, -0.396, 0.297, 0.242, -0.028, 0.321, -0.022, -0.009, -0.001 , 0.031, -0.533, 0.45, -0.683, 1.331, 0.194, -0.157, -0.1 , -0.279, -0.098, -0.176] } + { "vector": [230.0, 300.33, -34.8988, 15.555] } { "index": {"_id": "2"}} - { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + { "vector": [-0.5, 100.0, -13, 14.8] } { "index": {"_id": "3"}} - { "vector": [0.196, 0.514, 0.039, 0.555, -0.042, 0.242, 0.463, -0.348, -0.08 , 0.442, -0.067, -0.05 , -0.001, 0.298, -0.377, 0.048, 0.307, 0.159, 0.278, 0.119, -0.057, 0.333, -0.289, -0.438, -0.014, 0.361, -0.169, 0.292, -0.229, 0.123, 0.031, -0.138, -0.139, 0.315, -0.216, 0.322, -0.445, -0.059, 0.071, 0.429, -0.602, -0.142, 0.11 , 0.192, 0.259, -0.241, 0.181, -0.166, 0.082, 0.107, -0.05 , 0.155, 0.011, 0.161, -0.486, 0.569, -0.489, 0.901, 0.208, 0.011, -0.209, -0.153, -0.27 , -0.013] } + { "vector": [0.5, 111.3, -13.0, 14.8] } - do: headers: @@ -692,14 +691,7 @@ setup: body: knn: field: vector - query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, - 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, - 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, - -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , - -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, - -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, - -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, - -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + query_vector: [-0.5, 90.0, -10, 14.8] k: 3 num_candidates: 3 @@ -713,6 +705,7 @@ setup: Content-Type: application/json search: rest_total_hits_as_int: true + index: int4_rescore_hnsw body: query: script_score: @@ -720,17 +713,228 @@ setup: script: source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" params: - query_vector: [0.128, 0.067, -0.08 , 0.395, -0.11 , -0.259, 0.473, -0.393, - 0.292, 0.571, -0.491, 0.444, -0.288, 0.198, -0.343, 0.015, - 0.232, 0.088, 0.228, 0.151, -0.136, 0.236, -0.273, -0.259, - -0.217, 0.359, -0.207, 0.352, -0.142, 0.192, -0.061, -0.17 , - -0.343, 0.189, -0.221, 0.32 , -0.301, -0.1 , 0.005, 0.232, - -0.344, 0.136, 0.252, 0.157, -0.13 , -0.244, 0.193, -0.034, - -0.12 , -0.193, -0.102, 0.252, -0.185, -0.167, -0.575, 0.582, - -0.426, 0.983, 0.212, 0.204, 0.03 , -0.276, -0.425, -0.158] + query_vector: [-0.5, 90.0, -10, 14.8] # Compare scores as hit IDs may change depending on how things are distributed - match: { hits.total: 3 } - match: { hits.hits.0._score: $rescore_score0 } - match: { hits.hits.1._score: $rescore_score1 } - match: { hits.hits.2._score: $rescore_score2 } +--- +"Test index configured rescore vector updateable and settable to 0": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + + - do: + indices.create: + index: int4_rescore_0_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + index_options: + type: int4_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.create: + index: int4_rescore_update_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + index_options: + type: int4_hnsw + rescore_vector: + oversample: 1 + + - do: + indices.put_mapping: + index: int4_rescore_update_hnsw + body: + properties: + vector: + type: dense_vector + index_options: + type: int4_hnsw + rescore_vector: + oversample: 0 + + - do: + indices.get_mapping: + index: int4_rescore_update_hnsw + + - match: { .int4_rescore_update_hnsw.mappings.properties.vector.index_options.rescore_vector.oversample: 0 } +--- +"Test index configured rescore vector score consistency": + - requires: + cluster_features: ["mapper.dense_vector.rescore_zero_vector"] + reason: Needs rescore_zero_vector feature + - skip: + features: "headers" + - do: + indices.create: + index: int4_rescore_zero_hnsw + body: + settings: + index: + number_of_shards: 1 + mappings: + properties: + vector: + type: dense_vector + similarity: max_inner_product + dims: 4 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 0 + + - do: + bulk: + index: int4_rescore_zero_hnsw + refresh: true + body: | + { "index": {"_id": "1"}} + { "vector": [230.0, 300.33, -34.8988, 15.555] } + { "index": {"_id": "2"}} + { "vector": [-0.5, 100.0, -13, 14.8] } + { "index": {"_id": "3"}} + { "vector": [0.5, 111.3, -13.0, 14.8] } + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: raw_score0 } + - set: { hits.hits.1._score: raw_score1 } + - set: { hits.hits.2._score: raw_score2 } + + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + rescore_vector: + oversample: 2 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: override_score0 } + - set: { hits.hits.1._score: override_score1 } + - set: { hits.hits.2._score: override_score2 } + + - do: + indices.put_mapping: + index: int4_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + similarity: max_inner_product + dims: 4 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 2 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + - match: { hits.total: 3 } + - set: { hits.hits.0._score: default_rescore0 } + - set: { hits.hits.1._score: default_rescore1 } + - set: { hits.hits.2._score: default_rescore2 } + + - do: + indices.put_mapping: + index: int4_rescore_zero_hnsw + body: + properties: + vector: + type: dense_vector + similarity: max_inner_product + dims: 4 + index_options: + type: int4_hnsw + rescore_vector: + oversample: 0 + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + query: + script_score: + query: {match_all: {} } + script: + source: "double similarity = dotProduct(params.query_vector, 'vector'); return similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1" + params: + query_vector: [-0.5, 90.0, -10, 14.8] + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $override_score0 } + - match: { hits.hits.0._score: $default_rescore0 } + - match: { hits.hits.1._score: $override_score1 } + - match: { hits.hits.1._score: $default_rescore1 } + - match: { hits.hits.2._score: $override_score2 } + - match: { hits.hits.2._score: $default_rescore2 } + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + index: int4_rescore_zero_hnsw + body: + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8] + k: 3 + num_candidates: 3 + + # Compare scores as hit IDs may change depending on how things are distributed + - match: { hits.total: 3 } + - match: { hits.hits.0._score: $raw_score0 } + - match: { hits.hits.1._score: $raw_score1 } + - match: { hits.hits.2._score: $raw_score2 } diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 322e988b4f42a..625a875362e8e 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -213,6 +213,7 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_AGGREGATE_METRIC_DOUBLE_BLOCK_8_19 = def(8_841_0_24); public static final TransportVersion INTRODUCE_FAILURES_LIFECYCLE_BACKPORT_8_19 = def(8_841_0_25); public static final TransportVersion INTRODUCE_FAILURES_DEFAULT_RETENTION_BACKPORT_8_19 = def(8_841_0_26); + public static final TransportVersion RESCORE_VECTOR_ALLOW_ZERO_BACKPORT_8_19 = def(8_841_0_27); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 0080572ec948c..8f17c16a16e6e 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -129,6 +129,7 @@ private static IndexVersion def(int id, Version luceneVersion) { public static final IndexVersion USE_SYNTHETIC_SOURCE_FOR_RECOVERY_BY_DEFAULT_BACKPORT = def(8_526_0_00, Version.LUCENE_9_12_1); public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY = def(8_527_0_00, Version.LUCENE_9_12_1); public static final IndexVersion ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS = def(8_528_0_00, Version.LUCENE_9_12_1); + public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS = def(8_529_0_00, Version.LUCENE_9_12_1); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 17c4a62a9898f..bb987dc284bfe 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -18,6 +18,7 @@ import java.util.Set; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING; /** * Spec for mapper-related features. @@ -93,7 +94,8 @@ public Set getTestFeatures() { UKNOWN_FIELD_MAPPING_UPDATE_ERROR_MESSAGE, DateFieldMapper.INVALID_DATE_FIX, NPE_ON_DIMS_UPDATE_FIX, - RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING + RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING, + RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 2c4c843e429cb..026a8d195be7e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -95,6 +95,7 @@ import static org.elasticsearch.common.Strings.format; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; import static org.elasticsearch.index.IndexVersions.DEFAULT_DENSE_VECTOR_TO_INT8_HNSW; +import static org.elasticsearch.index.IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS; /** * A {@link FieldMapper} for indexing a dense vector of floats. @@ -116,13 +117,22 @@ private static boolean hasRescoreIndexVersion(IndexVersion version) { return version.onOrAfter(IndexVersions.ADD_RESCORE_PARAMS_TO_QUANTIZED_VECTORS); } + private static boolean allowsZeroRescore(IndexVersion version) { + return version.onOrAfter(RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS); + } + public static final IndexVersion MAGNITUDE_STORED_INDEX_VERSION = IndexVersions.V_7_5_0; public static final IndexVersion INDEXED_BY_DEFAULT_INDEX_VERSION = IndexVersions.FIRST_DETACHED_INDEX_VERSION; public static final IndexVersion NORMALIZE_COSINE = IndexVersions.NORMALIZED_VECTOR_COSINE; public static final IndexVersion DEFAULT_TO_INT8 = DEFAULT_DENSE_VECTOR_TO_INT8_HNSW; public static final IndexVersion LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION = IndexVersions.V_8_9_0; + public static final IndexVersion RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS = + IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS; public static final NodeFeature RESCORE_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature("mapper.dense_vector.rescore_vector"); + public static final NodeFeature RESCORE_ZERO_VECTOR_QUANTIZED_VECTOR_MAPPING = new NodeFeature( + "mapper.dense_vector.rescore_zero_vector" + ); public static final String CONTENT_TYPE = "dense_vector"; public static short MAX_DIMS_COUNT = 4096; // maximum allowed number of dimensions @@ -1293,7 +1303,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti } RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { - rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); + rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new Int8HnswIndexOptions(m, efConstruction, confidenceInterval, rescoreVector); @@ -1328,7 +1338,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti } RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { - rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); + rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new Int4HnswIndexOptions(m, efConstruction, confidenceInterval, rescoreVector); @@ -1371,7 +1381,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti } RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { - rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); + rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new Int8FlatIndexOptions(confidenceInterval, rescoreVector); @@ -1397,7 +1407,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti } RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { - rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); + rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new Int4FlatIndexOptions(confidenceInterval, rescoreVector); @@ -1428,7 +1438,7 @@ public IndexOptions parseIndexOptions(String fieldName, Map indexOpti int efConstruction = XContentMapValues.nodeIntegerValue(efConstructionNode); RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { - rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); + rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new BBQHnswIndexOptions(m, efConstruction, rescoreVector); @@ -1449,7 +1459,7 @@ public boolean supportsDimension(int dims) { public IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap, IndexVersion indexVersion) { RescoreVector rescoreVector = null; if (hasRescoreIndexVersion(indexVersion)) { - rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap); + rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion); } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); return new BBQFlatIndexOptions(rescoreVector); @@ -1963,7 +1973,7 @@ record RescoreVector(float oversample) implements ToXContentObject { static final String NAME = "rescore_vector"; static final String OVERSAMPLE = "oversample"; - static RescoreVector fromIndexOptions(Map indexOptionsMap) { + static RescoreVector fromIndexOptions(Map indexOptionsMap, IndexVersion indexVersion) { Object rescoreVectorNode = indexOptionsMap.remove(NAME); if (rescoreVectorNode == null) { return null; @@ -1973,16 +1983,16 @@ static RescoreVector fromIndexOptions(Map indexOptionsMap) { if (oversampleNode == null) { throw new IllegalArgumentException("Invalid rescore_vector value. Missing required field " + OVERSAMPLE); } - return new RescoreVector((float) XContentMapValues.nodeDoubleValue(oversampleNode)); - } - - RescoreVector { - if (oversample < 1) { + float oversampleValue = (float) XContentMapValues.nodeDoubleValue(oversampleNode); + if (oversampleValue == 0 && allowsZeroRescore(indexVersion) == false) { throw new IllegalArgumentException("oversample must be greater than 1"); } - if (oversample > 10) { + if (oversampleValue < 1 && oversampleValue != 0) { + throw new IllegalArgumentException("oversample must be greater than 1 or exactly 0"); + } else if (oversampleValue > 10) { throw new IllegalArgumentException("oversample must be less than or equal to 10"); } + return new RescoreVector(oversampleValue); } @Override @@ -2149,7 +2159,7 @@ public Query createKnnQuery( } private boolean needsRescore(Float rescoreOversample) { - return rescoreOversample != null && isQuantized(); + return rescoreOversample != null && rescoreOversample > 0 && isQuantized(); } private boolean isQuantized() { diff --git a/server/src/main/java/org/elasticsearch/search/vectors/RescoreVectorBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/RescoreVectorBuilder.java index 0e110a57d1e14..9bbe54d65061d 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/RescoreVectorBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/RescoreVectorBuilder.java @@ -9,9 +9,11 @@ package org.elasticsearch.search.vectors; +import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; @@ -21,9 +23,12 @@ import java.io.IOException; import java.util.Objects; +import static org.elasticsearch.TransportVersions.RESCORE_VECTOR_ALLOW_ZERO_BACKPORT_8_19; + public class RescoreVectorBuilder implements Writeable, ToXContentObject { public static final ParseField OVERSAMPLE_FIELD = new ParseField("oversample"); + public static final float NO_OVERSAMPLE = 0.0F; public static final float MIN_OVERSAMPLE = 1.0F; private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( "rescore_vector", @@ -39,8 +44,8 @@ public class RescoreVectorBuilder implements Writeable, ToXContentObject { public RescoreVectorBuilder(float numCandidatesFactor) { Objects.requireNonNull(numCandidatesFactor, "[" + OVERSAMPLE_FIELD.getPreferredName() + "] must be set"); - if (numCandidatesFactor < MIN_OVERSAMPLE) { - throw new IllegalArgumentException("[" + OVERSAMPLE_FIELD.getPreferredName() + "] must be >= " + MIN_OVERSAMPLE); + if (numCandidatesFactor < MIN_OVERSAMPLE && numCandidatesFactor != NO_OVERSAMPLE) { + throw new IllegalArgumentException("[" + OVERSAMPLE_FIELD.getPreferredName() + "] must be >= " + MIN_OVERSAMPLE + " or 0"); } this.oversample = numCandidatesFactor; } @@ -51,6 +56,17 @@ public RescoreVectorBuilder(StreamInput in) throws IOException { @Override public void writeTo(StreamOutput out) throws IOException { + // We don't want to serialize a `0` oversample to a node that doesn't know what to do with it. + if (oversample == NO_OVERSAMPLE && out.getTransportVersion().isPatchFrom(RESCORE_VECTOR_ALLOW_ZERO_BACKPORT_8_19) == false) { + throw new ElasticsearchStatusException( + "[rescore_vector] does not support a 0 for [" + + OVERSAMPLE_FIELD.getPreferredName() + + "] before version [" + + RESCORE_VECTOR_ALLOW_ZERO_BACKPORT_8_19.toReleaseVersion() + + "]", + RestStatus.BAD_REQUEST + ); + } out.writeFloat(oversample); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 27c64de80b43f..496f8b908fcbf 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -903,7 +903,7 @@ public void testRescoreVectorForNonQuantized() { } } - public void tesetRescoreVectorOldIndexVersion() { + public void testRescoreVectorOldIndexVersion() { IndexVersion incompatibleVersion = IndexVersionUtils.randomVersionBetween( random(), IndexVersions.V_8_0_0, @@ -927,6 +927,30 @@ public void tesetRescoreVectorOldIndexVersion() { } } + public void testRescoreZeroVectorOldIndexVersion() { + IndexVersion incompatibleVersion = IndexVersionUtils.randomVersionBetween( + random(), + IndexVersions.V_8_0_0, + IndexVersionUtils.getPreviousVersion(IndexVersions.RESCORE_PARAMS_ALLOW_ZERO_TO_QUANTIZED_VECTORS) + ); + for (String indexType : List.of("int8_hnsw", "int8_flat", "int4_hnsw", "int4_flat", "bbq_hnsw", "bbq_flat")) { + expectThrows( + MapperParsingException.class, + () -> createDocumentMapper( + incompatibleVersion, + fieldMapping( + b -> b.field("type", "dense_vector") + .field("index", true) + .startObject("index_options") + .field("type", indexType) + .field(DenseVectorFieldMapper.RescoreVector.NAME, Map.of("oversample", 0f)) + .endObject() + ) + ) + ); + } + } + public void testInvalidRescoreVector() { for (String indexType : List.of("int8_hnsw", "int8_flat", "int4_hnsw", "int4_flat", "bbq_hnsw", "bbq_flat")) { Exception e = expectThrows( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index e98038b7a0759..b6df46d17b598 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -50,7 +50,7 @@ public DenseVectorFieldTypeTests() { } private static DenseVectorFieldMapper.RescoreVector randomRescoreVector() { - return new DenseVectorFieldMapper.RescoreVector(randomFloatBetween(1.0F, 10.0F, false)); + return new DenseVectorFieldMapper.RescoreVector(randomBoolean() ? 0 : randomFloatBetween(1.0F, 10.0F, false)); } private DenseVectorFieldMapper.IndexOptions randomIndexOptionsNonQuantized() { @@ -94,24 +94,24 @@ private DenseVectorFieldMapper.IndexOptions randomIndexOptionsAll() { } private DenseVectorFieldMapper.IndexOptions randomIndexOptionsHnswQuantized() { + return randomIndexOptionsHnswQuantized(randomBoolean() ? null : randomRescoreVector()); + } + + private DenseVectorFieldMapper.IndexOptions randomIndexOptionsHnswQuantized(DenseVectorFieldMapper.RescoreVector rescoreVector) { return randomFrom( new DenseVectorFieldMapper.Int8HnswIndexOptions( randomIntBetween(1, 100), randomIntBetween(1, 10_000), randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)), - randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) + rescoreVector ), new DenseVectorFieldMapper.Int4HnswIndexOptions( randomIntBetween(1, 100), randomIntBetween(1, 10_000), randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)), - randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) + rescoreVector ), - new DenseVectorFieldMapper.BBQHnswIndexOptions( - randomIntBetween(1, 100), - randomIntBetween(1, 10_000), - randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()) - ) + new DenseVectorFieldMapper.BBQHnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 10_000), rescoreVector) ); } @@ -492,6 +492,40 @@ public void testRescoreOversampleModifiesNumCandidates() { checkRescoreQueryParameters(fieldType, 1000, 1000, 11.0F, OVERSAMPLE_LIMIT, OVERSAMPLE_LIMIT, 1000); } + public void testRescoreOversampleQueryOverrides() { + // verify we can override to `0` + DenseVectorFieldType fieldType = new DenseVectorFieldType( + "f", + IndexVersion.current(), + FLOAT, + 3, + true, + VectorSimilarity.COSINE, + randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(randomFloatBetween(1.1f, 9.9f, false))), + Collections.emptyMap() + ); + Query query = fieldType.createKnnQuery(VectorData.fromFloats(new float[] { 1, 4, 10 }), 10, 100, 0f, null, null, null); + assertTrue(query instanceof ESKnnFloatVectorQuery); + + // verify we can override a `0` to a positive number + fieldType = new DenseVectorFieldType( + "f", + IndexVersion.current(), + FLOAT, + 3, + true, + VectorSimilarity.COSINE, + randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(0)), + Collections.emptyMap() + ); + query = fieldType.createKnnQuery(VectorData.fromFloats(new float[] { 1, 4, 10 }), 10, 100, 2f, null, null, null); + assertTrue(query instanceof RescoreKnnVectorQuery); + assertThat(((RescoreKnnVectorQuery) query).k(), equalTo(10)); + ESKnnFloatVectorQuery esKnnQuery = (ESKnnFloatVectorQuery) ((RescoreKnnVectorQuery) query).innerQuery(); + assertThat(esKnnQuery.kParam(), equalTo(20)); + + } + private static void checkRescoreQueryParameters( DenseVectorFieldType fieldType, int k, diff --git a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java index b3764d528ff0f..1a964c8c2b4f7 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java @@ -166,7 +166,7 @@ protected RescoreVectorBuilder randomRescoreVectorBuilder() { return null; } - return new RescoreVectorBuilder(randomFloatBetween(1.0f, 10.0f, false)); + return new RescoreVectorBuilder(randomBoolean() ? 0f : randomFloatBetween(1.0f, 10.0f, false)); } @Override @@ -181,9 +181,13 @@ protected void doAssertLuceneQuery(KnnVectorQueryBuilder queryBuilder, Query que k = context.requestSize() == null || context.requestSize() < 0 ? DEFAULT_SIZE : context.requestSize(); } if (queryBuilder.rescoreVectorBuilder() != null && isQuantizedElementType()) { - RescoreKnnVectorQuery rescoreQuery = (RescoreKnnVectorQuery) query; - assertEquals(k.intValue(), (rescoreQuery.k())); - query = rescoreQuery.innerQuery(); + if (queryBuilder.rescoreVectorBuilder().oversample() > 0) { + RescoreKnnVectorQuery rescoreQuery = (RescoreKnnVectorQuery) query; + assertEquals(k.intValue(), (rescoreQuery.k())); + query = rescoreQuery.innerQuery(); + } else { + assertFalse(query instanceof RescoreKnnVectorQuery); + } } switch (elementType()) { case FLOAT -> assertTrue(query instanceof ESKnnFloatVectorQuery);