Skip to content

Commit 5826f4f

Browse files
authored
IVF Index Support in SVS (#156)
This update introduces IVF (Inverted File) index support in SVS, allowing for index construction using either standard one-level clustering or a faster two-level hierarchical clustering approach. The clustering algorithm is optimized to utilize AMX (Advanced Matrix Extensions) on supported Intel® Xeon® systems, enhancing performance on compatible hardware. Additionally, support for the BF16 (bfloat16) data type has been introduced, broadening the range of data formats that can be efficiently processed. ## Tasks - [x] IVF (Inverted File) index support: Adds both standard one-level and fast two-level hierarchical clustering methods for index construction. - [x] Optimized Clustering: Leverages AMX (Advanced Matrix Extensions) for improved performance on supported Intel® Xeon® hardware. - [x] BF16 Data Type Support: Enables efficient processing of bfloat16-formatted data. - [x] Python Bindings: Extends IVF functionality to Python. - [x] Benchmarks and C++ Tests: Benchmarks and C++ test coverage are enabled.
1 parent 89b3867 commit 5826f4f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+7121
-7
lines changed

.github/.licenserc.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ header:
4646
- '.github/renovate.json'
4747
- '.github/CODEOWNERS'
4848
- 'cmake/mkl_functions'
49+
- 'cmake/mkl_functions_ivf'
4950
- 'cmake/patches/tomlplusplus_v330.patch'
5051
- 'docker/x86_64/manylinux2014/oneAPI.repo'
5152
- 'docs/cpp/index/loader-compatibility.csv'

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ usr/
44
wheelhouse/
55

66
# Bundled test data
7-
/data/
7+
/data/temp
88

99
# Misc tool related files
1010
*.swp

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ include("cmake/fmt.cmake")
7777
include("cmake/spdlog.cmake")
7878
include("cmake/toml.cmake")
7979

80+
# IVF requires Intel(R) MKL support
81+
if(SVS_EXPERIMENTAL_ENABLE_IVF)
82+
include("cmake/mkl.cmake")
83+
target_compile_options(${SVS_LIB} INTERFACE "-DSVS_HAVE_MKL=1")
84+
endif()
85+
8086
add_library(svs_x86_options_base INTERFACE)
8187
add_library(svs::x86_options_base ALIAS svs_x86_options_base)
8288
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")

THIRD-PARTY-PROGRAMS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ Please also refer to the file .github/CONTRIBUTING.md, which clarifies licensing
159159
external contributions to this project including patches, pull requests, etc.
160160

161161
--------------------------------------------------------------------------------
162-
7. MKL (cmake/mkl.cmake, https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html)
162+
7. Intel(R) MKL (cmake/mkl.cmake, https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html)
163163

164164
Copyright (c) Intel Corporation, All rights reserved.
165165

benchmark/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ set(SHARED_LIBRARY_FILES
4848
src/inverted/memory/executables/memory_test.cpp
4949
)
5050

51+
# ivf
52+
if (SVS_EXPERIMENTAL_ENABLE_IVF)
53+
list(APPEND SHARED_LIBRARY_FILES
54+
src/ivf/uncompressed.cpp
55+
src/ivf/search.cpp
56+
src/ivf/build.cpp
57+
src/ivf/test.cpp
58+
)
59+
endif()
60+
61+
5162
add_library(svs_benchmark_library SHARED ${SHARED_LIBRARY_FILES})
5263
target_include_directories(svs_benchmark_library PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include)
5364

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
/*
2+
* Copyright 2025 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
// svs-benchmark
20+
#include "svs-benchmark/benchmark.h"
21+
#include "svs-benchmark/build.h"
22+
#include "svs-benchmark/datasets.h"
23+
#include "svs-benchmark/index_traits.h"
24+
#include "svs-benchmark/ivf/search.h"
25+
#include "svs-benchmark/search.h"
26+
27+
// svs
28+
#include "svs/orchestrators/ivf.h"
29+
30+
// stl
31+
#include <filesystem>
32+
#include <memory>
33+
#include <optional>
34+
#include <span>
35+
#include <string_view>
36+
#include <vector>
37+
38+
namespace svsbenchmark::ivf {
39+
40+
struct StaticBenchmark {};
41+
42+
// Forward declarations
43+
struct BuildJob;
44+
45+
template <typename T> struct AssociatedJob;
46+
47+
template <> struct AssociatedJob<StaticBenchmark> {
48+
using type = BuildJob;
49+
};
50+
51+
template <typename T> using associated_job_t = typename AssociatedJob<T>::type;
52+
53+
// Job names
54+
inline constexpr std::string_view benchmark_name(StaticBenchmark) {
55+
return "ivf_static_build";
56+
}
57+
58+
// Entry-point for registering the static index building executable.
59+
std::unique_ptr<Benchmark> static_workflow();
60+
61+
// Shared struct between the static and dynamic paths.
62+
struct BuildJobBase {
63+
public:
64+
// A descriptive name for this workload.
65+
std::string description_;
66+
67+
// The dataset to load
68+
Dataset dataset_;
69+
70+
// Paths
71+
std::filesystem::path data_;
72+
std::filesystem::path queries_;
73+
74+
// The number of queries (taken form queries) to use in the training set.
75+
size_t queries_in_training_set_;
76+
77+
// Dataset Parameters
78+
svs::DataType data_type_;
79+
svs::DataType query_type_;
80+
svs::DistanceType distance_;
81+
Extent ndims_;
82+
83+
// Build Parameters
84+
svs::index::ivf::IVFBuildParameters build_parameters_;
85+
size_t num_threads_;
86+
87+
public:
88+
///// Contructor
89+
BuildJobBase(
90+
std::string_view description,
91+
svsbenchmark::Dataset dataset,
92+
std::filesystem::path data,
93+
std::filesystem::path queries,
94+
size_t queries_in_training_set,
95+
svs::DataType data_type,
96+
svs::DataType query_type,
97+
svs::DistanceType distance,
98+
size_t ndims,
99+
const svs::index::ivf::IVFBuildParameters& build_parameters,
100+
size_t num_threads
101+
)
102+
: description_{description}
103+
, dataset_{dataset}
104+
, data_{std::move(data)}
105+
, queries_{std::move(queries)}
106+
, queries_in_training_set_{queries_in_training_set}
107+
, data_type_{data_type}
108+
, query_type_{query_type}
109+
, distance_{distance}
110+
, ndims_{ndims}
111+
, build_parameters_{build_parameters}
112+
, num_threads_{num_threads} {}
113+
114+
// Compatibility with `ExpectedResults`.
115+
const svs::index::ivf::IVFBuildParameters& get_build_parameters() const {
116+
return build_parameters_;
117+
}
118+
svs::DistanceType get_distance() const { return distance_; }
119+
120+
// Return an example BuildJob that can be used to generate sample config files.
121+
static BuildJobBase example() {
122+
return BuildJobBase(
123+
"example index build",
124+
Dataset::example(),
125+
"data.fvecs",
126+
"queries.fvecs",
127+
5000,
128+
svs::DataType::float32,
129+
svs::DataType::float32,
130+
svs::DistanceType::L2,
131+
svs::Dynamic,
132+
svs::index::ivf::IVFBuildParameters(128, 10000, 10, false, 0.1),
133+
8
134+
);
135+
}
136+
137+
svs::lib::SaveTable
138+
to_toml(std::string_view schema, const svs::lib::Version& version) const {
139+
return svs::lib::SaveTable(
140+
schema,
141+
version,
142+
{SVS_LIST_SAVE_(description),
143+
SVS_LIST_SAVE_(dataset),
144+
SVS_LIST_SAVE_(data),
145+
SVS_LIST_SAVE_(queries),
146+
SVS_LIST_SAVE_(queries_in_training_set),
147+
SVS_LIST_SAVE_(data_type),
148+
SVS_LIST_SAVE_(query_type),
149+
SVS_LIST_SAVE_(distance),
150+
SVS_LIST_SAVE_(ndims),
151+
SVS_LIST_SAVE_(build_parameters),
152+
SVS_LIST_SAVE_(num_threads)}
153+
);
154+
}
155+
156+
static BuildJobBase from_toml(
157+
const svs::lib::ContextFreeLoadTable& table,
158+
const std::optional<std::filesystem::path>& root
159+
) {
160+
namespace lib = svs::lib;
161+
return BuildJobBase(
162+
SVS_LOAD_MEMBER_AT_(table, description),
163+
SVS_LOAD_MEMBER_AT_(table, dataset, root),
164+
svsbenchmark::extract_filename(table, "data", root),
165+
svsbenchmark::extract_filename(table, "queries", root),
166+
SVS_LOAD_MEMBER_AT_(table, queries_in_training_set),
167+
SVS_LOAD_MEMBER_AT_(table, data_type),
168+
SVS_LOAD_MEMBER_AT_(table, query_type),
169+
SVS_LOAD_MEMBER_AT_(table, distance),
170+
SVS_LOAD_MEMBER_AT_(table, ndims),
171+
SVS_LOAD_MEMBER_AT_(table, build_parameters),
172+
SVS_LOAD_MEMBER_AT_(table, num_threads)
173+
);
174+
}
175+
};
176+
177+
// Parsed setup for a static index build job.
178+
struct BuildJob : public BuildJobBase {
179+
public:
180+
// Paths
181+
std::filesystem::path groundtruth_;
182+
// Preset search parameters
183+
std::vector<svs::index::ivf::IVFSearchParameters> preset_parameters_;
184+
// Post-build validation parameters.
185+
svsbenchmark::search::SearchParameters search_parameters_;
186+
// Directory to save the built index.
187+
// An empty optional implies no saving.
188+
std::optional<std::filesystem::path> save_directory_;
189+
190+
public:
191+
template <typename... Args>
192+
BuildJob(
193+
std::filesystem::path groundtruth,
194+
std::vector<svs::index::ivf::IVFSearchParameters> preset_parameters,
195+
svsbenchmark::search::SearchParameters search_parameters,
196+
std::optional<std::filesystem::path> save_directory,
197+
Args&&... args
198+
)
199+
: BuildJobBase(std::forward<Args>(args)...)
200+
, groundtruth_{std::move(groundtruth)}
201+
, preset_parameters_{std::move(preset_parameters)}
202+
, search_parameters_{std::move(search_parameters)}
203+
, save_directory_{std::move(save_directory)} {}
204+
205+
// Return an example BuildJob that can be used to generate sample config files.
206+
static BuildJob example() {
207+
return BuildJob(
208+
"groundtruth.ivecs", // groundtruth
209+
{{10, 1.0}, {10, 4.0}, {50, 1.0}}, // preset_parameters
210+
svsbenchmark::search::SearchParameters::example(), // search_parameters
211+
std::nullopt, // save_directory
212+
BuildJobBase::example() // base-class
213+
);
214+
}
215+
216+
// Compatibility with abstract search-space.
217+
std::vector<svs::index::ivf::IVFSearchParameters> get_search_configs() const {
218+
return preset_parameters_;
219+
}
220+
const svsbenchmark::search::SearchParameters& get_search_parameters() const {
221+
return search_parameters_;
222+
}
223+
224+
template <typename F>
225+
auto invoke(F&& f, const Checkpoint& SVS_UNUSED(checkpoint)) const {
226+
return f(dataset_, query_type_, data_type_, distance_, ndims_, *this);
227+
}
228+
229+
// Save the index if the `save_directory` field is non-empty.
230+
template <typename Index> void maybe_save_index(Index& index) const {
231+
if (!save_directory_) {
232+
return;
233+
}
234+
const auto& root = save_directory_.value();
235+
svs::lib::save_to_disk(index, root / "clustering");
236+
}
237+
238+
static constexpr svs::lib::Version save_version = svs::lib::Version(0, 0, 0);
239+
static constexpr std::string_view serialization_schema = "benchmark_ivf_build_job";
240+
241+
// Save the BuildJob to a TOML table.
242+
svs::lib::SaveTable save() const {
243+
// Get a base table.
244+
auto table = BuildJobBase::to_toml(serialization_schema, save_version);
245+
246+
// Append the extra information needed by the static BuildJob.
247+
SVS_INSERT_SAVE_(table, groundtruth);
248+
SVS_INSERT_SAVE_(table, preset_parameters);
249+
SVS_INSERT_SAVE_(table, search_parameters);
250+
table.insert("save_directory", svs::lib::save(save_directory_.value_or("")));
251+
return table;
252+
}
253+
254+
// Load a BuildJob from a TOML table.
255+
static BuildJob load(
256+
const svs::lib::ContextFreeLoadTable& table,
257+
const std::optional<std::filesystem::path>& root,
258+
svsbenchmark::SaveDirectoryChecker& checker
259+
) {
260+
return BuildJob(
261+
svsbenchmark::extract_filename(table, "groundtruth", root),
262+
SVS_LOAD_MEMBER_AT_(table, preset_parameters),
263+
SVS_LOAD_MEMBER_AT_(table, search_parameters),
264+
checker.extract(table.unwrap(), "save_directory"),
265+
BuildJobBase::from_toml(table, root)
266+
);
267+
}
268+
};
269+
270+
// Dispatchers
271+
using StaticBuildDispatcher = svs::lib::Dispatcher<
272+
toml::table,
273+
svsbenchmark::Dataset,
274+
svs::DataType,
275+
svs::DataType,
276+
svs::DistanceType,
277+
Extent,
278+
const BuildJob&>;
279+
280+
} // namespace svsbenchmark::ivf
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Copyright 2025 Intel Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
// svs-benchmark
18+
#include "svs-benchmark/benchmark.h"
19+
20+
// svs
21+
#include "svs/core/distance.h"
22+
#include "svs/index/ivf/common.h"
23+
24+
// stl
25+
#include <initializer_list>
26+
27+
namespace svsbenchmark::ivf {
28+
29+
// Test Routines
30+
SVS_BENCHMARK_FOR_TESTS_ONLY inline search::SearchParameters test_search_parameters() {
31+
return search::SearchParameters{10, {0.5, 0.8, 0.9}};
32+
}
33+
34+
SVS_BENCHMARK_FOR_TESTS_ONLY inline std::vector<svs::index::ivf::IVFSearchParameters>
35+
test_search_configs() {
36+
return std::vector<svs::index::ivf::IVFSearchParameters>({{{10, 1.0}, {50, 1.0}}});
37+
}
38+
39+
} // namespace svsbenchmark::ivf

0 commit comments

Comments
 (0)