|
31 | 31 | #include <pybind11/pybind11.h>
|
32 | 32 | #include <pybind11/stl.h>
|
33 | 33 |
|
| 34 | +// fmt |
| 35 | +#include <fmt/format.h> |
| 36 | + |
34 | 37 | // stl
|
35 | 38 | #include <span>
|
36 | 39 |
|
@@ -88,6 +91,75 @@ void add_build_specialization(py::class_<svs::DynamicVamana>& index) {
|
88 | 91 | );
|
89 | 92 | }
|
90 | 93 |
|
| 94 | +///// |
| 95 | +///// Build from file (data loader) |
| 96 | +///// |
| 97 | + |
| 98 | +template <typename Q, typename T, typename Dist, size_t N> |
| 99 | +svs::DynamicVamana dynamic_vamana_build_uncompressed( |
| 100 | + const svs::index::vamana::VamanaBuildParameters& parameters, |
| 101 | + svs::VectorDataLoader<T, N, RebindAllocator<T>> data_loader, |
| 102 | + std::span<const size_t> ids, |
| 103 | + svs::DistanceType distance_type, |
| 104 | + size_t num_threads |
| 105 | +) { |
| 106 | + return svs::DynamicVamana::build<Q>( |
| 107 | + parameters, |
| 108 | + std::move(data_loader), |
| 109 | + ids, |
| 110 | + distance_type, |
| 111 | + num_threads |
| 112 | + ); |
| 113 | +} |
| 114 | + |
| 115 | +using DynamicVamanaBuildFromFileDispatcher = svs::lib::Dispatcher< |
| 116 | + svs::DynamicVamana, |
| 117 | + const svs::index::vamana::VamanaBuildParameters&, |
| 118 | + UnspecializedVectorDataLoader, |
| 119 | + std::span<const size_t>, |
| 120 | + svs::DistanceType, |
| 121 | + size_t>; |
| 122 | + |
| 123 | +DynamicVamanaBuildFromFileDispatcher dynamic_vamana_build_from_file_dispatcher() { |
| 124 | + auto dispatcher = DynamicVamanaBuildFromFileDispatcher{}; |
| 125 | + // Register uncompressed specializations (Dynamic dimensionality only, similar to tests) |
| 126 | + for_standard_specializations([&]<typename Q, typename T, typename D, size_t N>() { |
| 127 | + // Only register when N is Dynamic (compile-time tag) - the pattern in static code |
| 128 | + // registers all; here we directly register. |
| 129 | + auto method = &dynamic_vamana_build_uncompressed<Q, T, D, N>; |
| 130 | + dispatcher.register_target(svs::lib::dispatcher_build_docs, method); |
| 131 | + }); |
| 132 | + return dispatcher; |
| 133 | +} |
| 134 | + |
| 135 | +svs::DynamicVamana dynamic_vamana_build_from_file( |
| 136 | + const svs::index::vamana::VamanaBuildParameters& parameters, |
| 137 | + UnspecializedVectorDataLoader data_loader, |
| 138 | + const py_contiguous_array_t<size_t>& py_ids, |
| 139 | + svs::DistanceType distance_type, |
| 140 | + size_t num_threads |
| 141 | +) { |
| 142 | + auto ids = std::span<const size_t>(py_ids.data(), py_ids.size()); |
| 143 | + return dynamic_vamana_build_from_file_dispatcher().invoke( |
| 144 | + parameters, std::move(data_loader), ids, distance_type, num_threads |
| 145 | + ); |
| 146 | +} |
| 147 | + |
| 148 | +constexpr std::string_view DYNAMIC_VAMANA_BUILD_FROM_FILE_DOCSTRING_PROTO = R"( |
| 149 | +Construct a DynamicVamana index using a data loader, returning the index. |
| 150 | +
|
| 151 | +Args: |
| 152 | + parameters: Build parameters controlling graph construction. |
| 153 | + data_loader: Data loader (e.g., an VectorDataLoader instance). |
| 154 | + ids: Vector of ids to assign to each row in the dataset; must match dataset length and contain unique values. |
| 155 | + distance_type: The similarity function to use for this index. |
| 156 | + num_threads: Number of threads to use for index construction. Default: 1. |
| 157 | +
|
| 158 | +Specializations compiled into the binary are listed below. |
| 159 | +
|
| 160 | +{} # (Method listing auto-generated) |
| 161 | +)"; |
| 162 | + |
91 | 163 | template <typename ElementType>
|
92 | 164 | void add_points(
|
93 | 165 | svs::DynamicVamana& index,
|
@@ -301,6 +373,31 @@ void wrap(py::module& m) {
|
301 | 373 | // Index building.
|
302 | 374 | add_build_specialization<float>(vamana);
|
303 | 375 |
|
| 376 | + // Build from file / data loader (dynamic docstring) |
| 377 | + { |
| 378 | + auto dispatcher = dynamic_vamana_build_from_file_dispatcher(); |
| 379 | + std::string dynamic; |
| 380 | + for (size_t i = 0; i < dispatcher.size(); ++i) { |
| 381 | + fmt::format_to( |
| 382 | + std::back_inserter(dynamic), |
| 383 | + R"(Method {}:\n - data_loader: {}\n - distance: {}\n)", |
| 384 | + i, |
| 385 | + dispatcher.description(i, 1), |
| 386 | + dispatcher.description(i, 3) |
| 387 | + ); |
| 388 | + } |
| 389 | + vamana.def_static( |
| 390 | + "build", |
| 391 | + &dynamic_vamana_build_from_file, |
| 392 | + py::arg("parameters"), |
| 393 | + py::arg("data_loader"), |
| 394 | + py::arg("ids"), |
| 395 | + py::arg("distance_type"), |
| 396 | + py::arg("num_threads") = 1, |
| 397 | + fmt::format(DYNAMIC_VAMANA_BUILD_FROM_FILE_DOCSTRING_PROTO, dynamic).c_str() |
| 398 | + ); |
| 399 | + } |
| 400 | + |
304 | 401 | // Index modification.
|
305 | 402 | add_points_specialization<float>(vamana);
|
306 | 403 |
|
|
0 commit comments