Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 52 additions & 9 deletions faiss/IndexFlat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,15 +663,44 @@ void IndexFlatPanorama::reconstruct_n(idx_t i, idx_t n, float* recons) const {
Index::reconstruct_n(i, n, recons);
}

size_t IndexFlatPanorama::remove_ids(const IDSelector& /* sel */) {
FAISS_THROW_MSG("remove_ids not implemented for IndexFlatPanorama");
return 0;
size_t IndexFlatPanorama::remove_ids(const IDSelector& sel) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idx_t j = 0;
for (idx_t i = 0; i < ntotal; i++) {
if (sel.is_member(i)) {
// should be removed
} else {
if (i > j) {
pano.copy_entry(
codes.data(),
codes.data(),
cum_sums.data(),
cum_sums.data(),
j,
i);
}
j++;
}
}
size_t nremove = ntotal - j;
if (nremove > 0) {
ntotal = j;
size_t num_batches = (ntotal + batch_size - 1) / batch_size;
codes.resize(num_batches * batch_size * code_size);
cum_sums.resize(num_batches * batch_size * (n_levels + 1));
}
return nremove;
}

void IndexFlatPanorama::merge_from(
Index& /* otherIndex */,
idx_t /* add_id */) {
FAISS_THROW_MSG("merge_from not implemented for IndexFlatPanorama");
void IndexFlatPanorama::merge_from(Index& otherIndex, idx_t add_id) {
FAISS_THROW_IF_NOT_MSG(add_id == 0, "cannot set ids in FlatPanorama index");
check_compatible_for_merge(otherIndex);
IndexFlatPanorama* other = static_cast<IndexFlatPanorama*>(&otherIndex);

std::vector<float> buffer(other->ntotal * code_size);
otherIndex.reconstruct_n(0, other->ntotal, buffer.data());

add(other->ntotal, buffer.data());
other->reset();
}

void IndexFlatPanorama::add_sa_codes(
Expand All @@ -681,7 +710,21 @@ void IndexFlatPanorama::add_sa_codes(
FAISS_THROW_MSG("add_sa_codes not implemented for IndexFlatPanorama");
}

void IndexFlatPanorama::permute_entries(const idx_t* /* perm */) {
FAISS_THROW_MSG("permute_entries not implemented for IndexFlatPanorama");
void IndexFlatPanorama::permute_entries(const idx_t* perm) {
MaybeOwnedVector<uint8_t> new_codes(codes.size());
std::vector<float> new_cum_sums(cum_sums.size());

for (idx_t i = 0; i < ntotal; i++) {
pano.copy_entry(
new_codes.data(),
codes.data(),
new_cum_sums.data(),
cum_sums.data(),
i,
perm[i]);
}

std::swap(codes, new_codes);
std::swap(cum_sums, new_cum_sums);
}
} // namespace faiss
43 changes: 43 additions & 0 deletions faiss/impl/Panorama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,4 +147,47 @@ void Panorama::reconstruct(idx_t key, float* recons, const uint8_t* codes_base)
memcpy(dest, src, copy_size);
}
}

void Panorama::copy_entry(
uint8_t* dest_codes,
uint8_t* src_codes,
float* dest_cum_sums,
float* src_cum_sums,
size_t dest_idx,
size_t src_idx) const {
// Calculate positions
size_t src_batch_no = src_idx / batch_size;
size_t src_pos_in_batch = src_idx % batch_size;
size_t dest_batch_no = dest_idx / batch_size;
size_t dest_pos_in_batch = dest_idx % batch_size;

// Calculate offsets
size_t src_batch_offset = src_batch_no * batch_size * code_size;
size_t dest_batch_offset = dest_batch_no * batch_size * code_size;
size_t src_cumsum_batch_offset = src_batch_no * batch_size * (n_levels + 1);
size_t dest_cumsum_batch_offset =
dest_batch_no * batch_size * (n_levels + 1);

for (size_t level = 0; level < n_levels; level++) {
// Copy code
size_t level_offset = level * level_width * batch_size;
size_t actual_level_width =
std::min(level_width, code_size - level * level_width);

const uint8_t* src = src_codes + src_batch_offset + level_offset +
src_pos_in_batch * actual_level_width;
uint8_t* dest = dest_codes + dest_batch_offset + level_offset +
dest_pos_in_batch * actual_level_width;
memcpy(dest, src, actual_level_width);

// Copy cum_sums
size_t cumsum_level_offset = level * batch_size;

const size_t src_offset = src_cumsum_batch_offset +
cumsum_level_offset + src_pos_in_batch;
size_t dest_offset = dest_cumsum_batch_offset + cumsum_level_offset +
dest_pos_in_batch;
dest_cum_sums[dest_offset] = src_cum_sums[src_offset];
}
}
} // namespace faiss
9 changes: 9 additions & 0 deletions faiss/impl/Panorama.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@ struct Panorama {
void compute_query_cum_sums(const float* query, float* query_cum_sums)
const;

/// Copy single entry (code and cum_sum) from one location to another.
void copy_entry(
uint8_t* dest_codes,
uint8_t* src_codes,
float* dest_cum_sums,
float* src_cum_sums,
size_t dest_idx,
size_t src_idx) const;

/// Panorama's core progressive filtering algorithm:
/// Process vectors in batches for cache efficiency. For each batch:
/// 1. Apply ID selection filter and initialize distances
Expand Down
99 changes: 94 additions & 5 deletions tests/test_flat_l2_panorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import unittest
import tempfile
import os

import faiss
import numpy as np
Expand Down Expand Up @@ -313,7 +312,7 @@ def test_selector_excludes_all(self):
def test_batch_boundaries(self):
"""Test correctness at various batch size boundaries"""
d, nt, nq, k = 128, 10000, 10, 15
# random train not needed for Flat indices
# random train not needed for Flat indexes
xq = np.random.rand(nq, d).astype("float32")

# Use index's batch_size
Expand Down Expand Up @@ -472,6 +471,98 @@ def test_reconstruct(self):
start_idx, n_vectors = 120, 10
vn_panorama = index_panorama.reconstruct_n(start_idx, n_vectors)
np.testing.assert_array_equal(xb[start_idx:start_idx + n_vectors], vn_panorama)

def test_remove_ids_then_add(self):
"""Test removing vectors with remove_ids() then adding more vectors"""
d, nb, nt, nq, nlevels, k = 128, 500000, 0, 10, 9, 15
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=2026)

xb1 = xb[:nb//2]
xb2 = xb[nb//2:]

index_regular = self.create_flat(d, xb1)
index_panorama = self.create_panorama(d, nlevels, xb1)

# Remove every even ID
ids_to_remove = np.arange(0, nb, 2, dtype=np.int64)

nremove_regular = index_regular.remove_ids(ids_to_remove)
nremove_panorama = index_panorama.remove_ids(ids_to_remove)

# Verify same number of IDs removed
self.assertEqual(nremove_regular, nremove_panorama)

# Verify ntotal updated correctly
self.assertEqual(index_regular.ntotal, index_panorama.ntotal)

# Verify search results match between regular and panorama
D_reg_1, I_reg_1 = index_regular.search(xq, k)
D_pan_1, I_pan_1 = index_panorama.search(xq, k)
self.assert_search_results_equal(D_reg_1, I_reg_1, D_pan_1, I_pan_1)

# Second add and search
index_regular.add(xb2)
index_panorama.add(xb2)

# Verify ntotal updated correctly
self.assertEqual(index_regular.ntotal, index_panorama.ntotal)

# Verify second search results match between regular and panorama
D_reg_2, I_reg_2 = index_regular.search(xq, k)
D_pan_2, I_pan_2 = index_panorama.search(xq, k)
self.assert_search_results_equal(D_reg_2, I_reg_2, D_pan_2, I_pan_2)

def test_merge_from(self):
"""Test merging indexes with merge_from()"""
d, nb, nt, nq, nlevels, k, batch_size = 128, 500000, 0, 10, 9, 15, 16
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=2027)

# Split data and create two separate indexes
split = nb // 2
xb1 = xb[:split]
xb2 = xb[split:]

index1_regular = self.create_flat(d, xb1)
index2_regular = self.create_flat(d, xb2)

index1_panorama = self.create_panorama(d, nlevels, xb1, batch_size)
index2_panorama = self.create_panorama(d, nlevels * 2, xb2, batch_size // 2)

# Merge second index into first
index1_regular.merge_from(index2_regular, 0)
index1_panorama.merge_from(index2_panorama, 0)

# Verify ntotal after merge
self.assertEqual(index1_regular.ntotal, index1_panorama.ntotal)
self.assertEqual(index2_regular.ntotal, index2_panorama.ntotal)

# Verify merged index results
D_regular, I_regular = index1_regular.search(xq, k)
D_panorama, I_panorama = index1_panorama.search(xq, k)
self.assert_search_results_equal(D_regular, I_regular, D_panorama, I_panorama)

def test_permute_entries(self):
"""Test permuting entries with permute_entries()"""
d, nb, nt, nq, nlevels, k = 128, 500000, 0, 10, 8, 15
_, xb, xq = self.generate_data(d, nt, nb, nq, seed=2028)

index_regular = self.create_flat(d, xb)
index_panorama = self.create_panorama(d, nlevels, xb)

# Create a random permutation
np.random.seed(1234)
perm = np.random.permutation(nb).astype(np.int64)

# Apply permutation to both indexes
index_regular.permute_entries(perm)
index_panorama.permute_entries(perm)

# Search after permutation
D_regular, I_regular = index_regular.search(xq, k)
D_panorama, I_panorama = index_panorama.search(xq, k)

# Verify permuted indexes match each other
self.assert_search_results_equal(D_regular, I_regular, D_panorama, I_panorama)

def test_serialization(self):
"""Test that writing and reading Panorama indexes preserves search results"""
Expand All @@ -481,10 +572,8 @@ def test_serialization(self):
index = self.create_panorama(d, nlevels, xb)

D_before, I_before = index.search(xq, k)
faiss.write_index(index, "index.bin")
index_after = faiss.read_index("index.bin")
index_after = faiss.deserialize_index(faiss.serialize_index(index))
D_after, I_after = index_after.search(xq, k)
os.unlink("index.bin")

np.testing.assert_array_equal(I_before, I_after)
np.testing.assert_array_equal(D_before, D_after)
Expand Down
5 changes: 1 addition & 4 deletions tests/test_ivf_flat_panorama.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import unittest
import tempfile
import os

import faiss
import numpy as np
Expand Down Expand Up @@ -535,10 +534,8 @@ def test_serialization(self):
index = self.create_panorama(d, nlist, nlevels, xt, xb, nprobe=32)

D_before, I_before = index.search(xq, k)
faiss.write_index(index, "index.bin")
index_after = faiss.read_index("index.bin")
index_after = faiss.deserialize_index(faiss.serialize_index(index))
D_after, I_after = index_after.search(xq, k)
os.unlink("index.bin")

np.testing.assert_array_equal(I_before, I_after)
np.testing.assert_array_equal(D_before, D_after)
Expand Down