From efe44b0c116135740e2fd4ac525d9e8935618adc Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 11 Jul 2023 14:12:44 +0000 Subject: [PATCH 1/6] replace element_neighbors_locks_ lockes with read locks where possible --- src/VecSim/algorithms/hnsw/hnsw.h | 62 ++++++++++--------- .../algorithms/hnsw/hnsw_batch_iterator.h | 4 +- src/VecSim/index_factories/hnsw_factory.cpp | 4 +- tests/unit/test_allocator.cpp | 2 +- 4 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index c4726ef66..8d5f20318 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -47,6 +47,9 @@ template using candidatesLabelsMaxHeap = vecsim_stl::abstract_priority_queue; using graphNodeType = pair; // represented as: (element_id, level) +using elem_mutex_t = std::shared_mutex; +using elem_write_mutex_t = std::unique_lock; +using elem_read_mutex_t = std::shared_lock; // Vectors flags (for marking a specific vector) typedef enum { DELETE_MARK = 0x1, // element is logically deleted, but still exists in the graph @@ -113,7 +116,7 @@ class HNSWIndex : public VecSimIndexAbstract, mutable VisitedNodesHandlerPool visited_nodes_handler_pool; mutable std::shared_mutex index_data_guard_; - mutable vecsim_stl::vector element_neighbors_locks_; + mutable vecsim_stl::vector element_neighbors_locks_; #ifdef BUILD_TESTS #include "VecSim/algorithms/hnsw/hnsw_base_tests_friends.h" @@ -169,8 +172,8 @@ class HNSWIndex : public VecSimIndexAbstract, const std::pair &neighbor_data, idType *new_node_neighbors_list, idType *neighbor_neighbors_list, - std::unique_lock &node_lock, - std::unique_lock &neighbor_lock); + elem_write_mutex_t &node_lock, + elem_write_mutex_t &neighbor_lock); inline idType mutuallyConnectNewElement(idType new_node_id, candidatesMaxHeap &top_candidates, size_t level); @@ -234,8 +237,8 @@ class HNSWIndex : public VecSimIndexAbstract, inline auto safeGetEntryPointState() const; inline void lockIndexDataGuard() const; inline void unlockIndexDataGuard() const; - inline void lockNodeLinks(idType node_id) const; - inline void unlockNodeLinks(idType node_id) const; + inline void lockForReadNodeLinks(idType node_id) const; + inline void unlockForReadNodeLinks(idType node_id) const; inline VisitedNodesHandler *getVisitedList() const; inline void returnVisitedList(VisitedNodesHandler *visited_nodes_handler) const; VecSimIndexInfo info() const override; @@ -515,13 +518,13 @@ void HNSWIndex::unlockIndexDataGuard() const { } template -void HNSWIndex::lockNodeLinks(idType node_id) const { - element_neighbors_locks_[node_id].lock(); +void HNSWIndex::lockForReadNodeLinks(idType node_id) const { + element_neighbors_locks_[node_id].lock_shared(); } template -void HNSWIndex::unlockNodeLinks(idType node_id) const { - element_neighbors_locks_[node_id].unlock(); +void HNSWIndex::unlockForReadNodeLinks(idType node_id) const { + element_neighbors_locks_[node_id].unlock_shared(); } template @@ -585,7 +588,7 @@ DistType HNSWIndex::processCandidate( tag_t *elements_tags, vecsim_stl::abstract_priority_queue &top_candidates, candidatesMaxHeap &candidate_set, DistType lowerBound) const { - std::unique_lock lock(element_neighbors_locks_[curNodeId]); + elem_read_mutex_t lock(element_neighbors_locks_[curNodeId]); idType *node_links = getNodeNeighborsAtLevel(curNodeId, layer); linkListSize links_num = getNodeNeighborsCount(node_links); @@ -637,7 +640,7 @@ void HNSWIndex::processCandidate_RangeSearch( tag_t *elements_tags, std::unique_ptr &results, candidatesMaxHeap &candidate_set, DistType dyn_range, double radius) const { - std::unique_lock lock(element_neighbors_locks_[curNodeId]); + elem_read_mutex_t lock(element_neighbors_locks_[curNodeId]); idType *node_links = getNodeNeighborsAtLevel(curNodeId, layer); linkListSize links_num = getNodeNeighborsCount(node_links); @@ -767,7 +770,7 @@ template void HNSWIndex::revisitNeighborConnections( size_t level, idType new_node_id, const std::pair &neighbor_data, idType *new_node_neighbors_list, idType *neighbor_neighbors_list, - std::unique_lock &node_lock, std::unique_lock &neighbor_lock) { + elem_write_mutex_t &node_lock, elem_write_mutex_t &neighbor_lock) { // Note - expect that node_lock and neighbor_lock are locked at that point. // Collect the existing neighbors and the new node as the neighbor's neighbors candidates. @@ -824,9 +827,9 @@ void HNSWIndex::revisitNeighborConnections( std::sort(nodes_to_update.begin(), nodes_to_update.end()); size_t nodes_to_update_count = nodes_to_update.size(); - std::unique_lock locks[nodes_to_update_count]; + elem_write_mutex_t locks[nodes_to_update_count]; for (size_t i = 0; i < nodes_to_update_count; i++) { - locks[i] = std::unique_lock(element_neighbors_locks_[nodes_to_update[i]]); + locks[i] = elem_write_mutex_t(element_neighbors_locks_[nodes_to_update[i]]); } auto *neighbour_incoming_edges = getIncomingEdgesPtr(selected_neighbor, level); @@ -915,17 +918,17 @@ idType HNSWIndex::mutuallyConnectNewElement( for (auto &neighbor_data : selected_neighbors) { idType selected_neighbor = neighbor_data.second; // neighbor's id - std::unique_lock node_lock; - std::unique_lock neighbor_lock; + elem_write_mutex_t node_lock; + elem_write_mutex_t neighbor_lock; idType lower_id = (new_node_id < selected_neighbor) ? new_node_id : selected_neighbor; if (lower_id == new_node_id) { - node_lock = std::unique_lock(element_neighbors_locks_[new_node_id]); + node_lock = elem_write_mutex_t(element_neighbors_locks_[new_node_id]); neighbor_lock = - std::unique_lock(element_neighbors_locks_[selected_neighbor]); + elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); } else { neighbor_lock = - std::unique_lock(element_neighbors_locks_[selected_neighbor]); - node_lock = std::unique_lock(element_neighbors_locks_[new_node_id]); + elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); + node_lock = elem_write_mutex_t(element_neighbors_locks_[new_node_id]); } // get the updated count - this may change between iterations due to releasing the lock. @@ -1068,7 +1071,8 @@ void HNSWIndex::replaceEntryPoint() { volatile idType candidate_in_process = INVALID_ID; { // Go over the entry point's neighbors at the top level. - std::unique_lock lock(this->element_neighbors_locks_[entrypoint_node_]); + // Assuming the lock only protects the entrypoint's neighbours and not the entrypoint itself, we can use a shred lock here, + elem_read_mutex_t lock(this->element_neighbors_locks_[entrypoint_node_]); idType *top_level_list = getNodeNeighborsAtLevel(old_entry, max_level_); auto neighbors_count = getNodeNeighborsCount(top_level_list); // Tries to set the (arbitrary) first neighbor as the entry point which is not deleted, @@ -1227,7 +1231,7 @@ void HNSWIndex::greedySearchLevel(const void *vector_data, s } changed = false; - std::unique_lock lock(element_neighbors_locks_[bestCand]); + elem_read_mutex_t lock(element_neighbors_locks_[bestCand]); idType *node_links = getNodeNeighborsAtNonBaseLevel(bestCand, level); linkListSize links_count = getNodeNeighborsCount(node_links); @@ -1265,7 +1269,7 @@ HNSWIndex::safeCollectAllNodeIncomingNeighbors(idType node_i for (size_t level = 0; level <= node_top_level; level++) { // Save the node neighbor's in the current level while holding its neighbors lock. std::vector neighbors_copy; - std::unique_lock element_lock(element_neighbors_locks_[node_id]); + elem_read_mutex_t element_lock(element_neighbors_locks_[node_id]); auto *neighbours = getNodeNeighborsAtLevel(node_id, level); unsigned short neighbours_count = getNodeNeighborsCount(neighbours); // Store the deleted element's neighbours. @@ -1275,7 +1279,7 @@ HNSWIndex::safeCollectAllNodeIncomingNeighbors(idType node_i // Go over the neighbours and collect tho ones that also points back to the removed node. for (auto neighbour_id : neighbors_copy) { // Hold the neighbor's lock while we are going over its neighbors. - std::unique_lock neighbor_lock(element_neighbors_locks_[neighbour_id]); + elem_read_mutex_t neighbor_lock(element_neighbors_locks_[neighbour_id]); auto *neighbour_neighbours = getNodeNeighborsAtLevel(neighbour_id, level); unsigned short neighbour_neighbours_count = getNodeNeighborsCount(neighbour_neighbours); for (size_t j = 0; j < neighbour_neighbours_count; j++) { @@ -1312,7 +1316,7 @@ void HNSWIndex::resizeIndexInternal(size_t new_max_elements) element_levels_.shrink_to_fit(); resizeLabelLookup(new_max_elements); visited_nodes_handler_pool.resize(new_max_elements); - vecsim_stl::vector(new_max_elements, this->allocator) + vecsim_stl::vector(new_max_elements, this->allocator) .swap(element_neighbors_locks_); // Reallocate base layer char *data_level0_memory_new = (char *)this->allocator->reallocate( @@ -1344,9 +1348,9 @@ void HNSWIndex::mutuallyUpdateForRepairedNode( nodes_to_update.push_back(node_id); std::sort(nodes_to_update.begin(), nodes_to_update.end()); size_t nodes_to_update_count = nodes_to_update.size(); - std::unique_lock locks[nodes_to_update_count]; + elem_write_mutex_t locks[nodes_to_update_count]; for (size_t i = 0; i < nodes_to_update_count; i++) { - locks[i] = std::unique_lock(element_neighbors_locks_[nodes_to_update[i]]); + locks[i] = elem_write_mutex_t(element_neighbors_locks_[nodes_to_update[i]]); } idType *node_neighbors = getNodeNeighborsAtLevel(node_id, level); @@ -1442,7 +1446,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t // Go over the repaired node neighbors, collect the non-deleted ones to be neighbors candidates // after the repair as well. { - std::unique_lock node_lock(element_neighbors_locks_[node_id]); + elem_read_mutex_t node_lock(element_neighbors_locks_[node_id]); idType *node_neighbors = getNodeNeighborsAtLevel(node_id, level); linkListSize node_neighbors_count = getNodeNeighborsCount(node_neighbors); for (size_t j = 0; j < node_neighbors_count; j++) { @@ -1477,7 +1481,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t nodes_to_update.push_back(deleted_neighbor_id); neighbors_to_remove.push_back(deleted_neighbor_id); - std::unique_lock neighbor_lock( + elem_write_mutex_t neighbor_lock( this->element_neighbors_locks_[deleted_neighbor_id]); idType *neighbor_neighbours = getNodeNeighborsAtLevel(deleted_neighbor_id, level); linkListSize neighbor_neighbours_count = getNodeNeighborsCount(neighbor_neighbours); diff --git a/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h b/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h index 1523d9835..dc14d1be4 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h +++ b/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h @@ -114,7 +114,7 @@ VecSimQueryResult_Code HNSW_BatchIterator::scanGraphInternal // Take the current node out of the candidates queue and go over his neighbours. candidates.pop(); - this->index->lockNodeLinks(curr_node_id); + this->index->lockForReadNodeLinks(curr_node_id); idType *node_links = this->index->getNodeNeighborsAtLevel(curr_node_id, 0); linkListSize links_num = this->index->getNodeNeighborsCount(node_links); @@ -138,7 +138,7 @@ VecSimQueryResult_Code HNSW_BatchIterator::scanGraphInternal candidates.emplace(candidate_dist, candidate_id); __builtin_prefetch(index->getNodeNeighborsAtLevel(candidates.top().second, 0)); } - this->index->unlockNodeLinks(curr_node_id); + this->index->unlockForReadNodeLinks(curr_node_id); } return VecSim_QueryResult_OK; } diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp index d6e22adf2..3072a49cd 100644 --- a/src/VecSim/index_factories/hnsw_factory.cpp +++ b/src/VecSim/index_factories/hnsw_factory.cpp @@ -88,7 +88,7 @@ size_t EstimateInitialSize(const HNSWParams *params) { est += sizeof(size_t) * params->initialCapacity + allocations_overhead; // Labels lookup hash table buckets. est += - sizeof(std::mutex) * params->initialCapacity + allocations_overhead; // lock per vector + sizeof(std::shared_mutex) * params->initialCapacity + allocations_overhead; // lock per vector } // Explicit allocation calls - always allocate a header. @@ -152,7 +152,7 @@ size_t EstimateElementSize(const HNSWParams *params) { // lookup hash map. size_t size_meta_data = sizeof(tag_t) + sizeof(size_t) + sizeof(size_t) + size_label_lookup_node; - size_t size_lock = sizeof(std::mutex); + size_t size_lock = sizeof(std::shared_mutex); /* Disclaimer: we are neglecting two additional factors that consume memory: * 1. The overall bucket size in labels_lookup hash table is usually higher than the number of diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp index 7fe109762..fee3081b0 100644 --- a/tests/unit/test_allocator.cpp +++ b/tests/unit/test_allocator.cpp @@ -415,7 +415,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) { // except for the bucket count of the labels_lookup hash table that is calculated separately. size_t size_total_data_per_element = hnswIndex->size_data_per_element_; expected_mem_delta += (sizeof(tag_t) + sizeof(void *) + sizeof(size_t) + - size_total_data_per_element + sizeof(std::mutex)) * + size_total_data_per_element + sizeof(std::shared_mutex)) * block_size; expected_mem_delta += (hnswIndex->label_lookup_.bucket_count() - prev_bucket_count) * sizeof(size_t); From 2ec2951ecf9e7eee13e7e5ecd9a15dfbd98e7f15 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 11 Jul 2023 14:42:09 +0000 Subject: [PATCH 2/6] format --- src/VecSim/algorithms/hnsw/hnsw.h | 19 ++++++++----------- src/VecSim/index_factories/hnsw_factory.cpp | 4 ++-- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 8d5f20318..40355ee04 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -171,8 +171,7 @@ class HNSWIndex : public VecSimIndexAbstract, void revisitNeighborConnections(size_t level, idType new_node_id, const std::pair &neighbor_data, idType *new_node_neighbors_list, - idType *neighbor_neighbors_list, - elem_write_mutex_t &node_lock, + idType *neighbor_neighbors_list, elem_write_mutex_t &node_lock, elem_write_mutex_t &neighbor_lock); inline idType mutuallyConnectNewElement(idType new_node_id, candidatesMaxHeap &top_candidates, @@ -769,8 +768,8 @@ void HNSWIndex::getNeighborsByHeuristic2( template void HNSWIndex::revisitNeighborConnections( size_t level, idType new_node_id, const std::pair &neighbor_data, - idType *new_node_neighbors_list, idType *neighbor_neighbors_list, - elem_write_mutex_t &node_lock, elem_write_mutex_t &neighbor_lock) { + idType *new_node_neighbors_list, idType *neighbor_neighbors_list, elem_write_mutex_t &node_lock, + elem_write_mutex_t &neighbor_lock) { // Note - expect that node_lock and neighbor_lock are locked at that point. // Collect the existing neighbors and the new node as the neighbor's neighbors candidates. @@ -923,11 +922,9 @@ idType HNSWIndex::mutuallyConnectNewElement( idType lower_id = (new_node_id < selected_neighbor) ? new_node_id : selected_neighbor; if (lower_id == new_node_id) { node_lock = elem_write_mutex_t(element_neighbors_locks_[new_node_id]); - neighbor_lock = - elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); + neighbor_lock = elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); } else { - neighbor_lock = - elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); + neighbor_lock = elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); node_lock = elem_write_mutex_t(element_neighbors_locks_[new_node_id]); } @@ -1071,7 +1068,8 @@ void HNSWIndex::replaceEntryPoint() { volatile idType candidate_in_process = INVALID_ID; { // Go over the entry point's neighbors at the top level. - // Assuming the lock only protects the entrypoint's neighbours and not the entrypoint itself, we can use a shred lock here, + // Assuming the lock only protects the entrypoint's neighbours and not the entrypoint + // itself, we can use a shred lock here, elem_read_mutex_t lock(this->element_neighbors_locks_[entrypoint_node_]); idType *top_level_list = getNodeNeighborsAtLevel(old_entry, max_level_); auto neighbors_count = getNodeNeighborsCount(top_level_list); @@ -1481,8 +1479,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t nodes_to_update.push_back(deleted_neighbor_id); neighbors_to_remove.push_back(deleted_neighbor_id); - elem_write_mutex_t neighbor_lock( - this->element_neighbors_locks_[deleted_neighbor_id]); + elem_write_mutex_t neighbor_lock(this->element_neighbors_locks_[deleted_neighbor_id]); idType *neighbor_neighbours = getNodeNeighborsAtLevel(deleted_neighbor_id, level); linkListSize neighbor_neighbours_count = getNodeNeighborsCount(neighbor_neighbours); diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp index 3072a49cd..833c8290f 100644 --- a/src/VecSim/index_factories/hnsw_factory.cpp +++ b/src/VecSim/index_factories/hnsw_factory.cpp @@ -87,8 +87,8 @@ size_t EstimateInitialSize(const HNSWParams *params) { est += sizeof(size_t) * params->initialCapacity + allocations_overhead; // element level est += sizeof(size_t) * params->initialCapacity + allocations_overhead; // Labels lookup hash table buckets. - est += - sizeof(std::shared_mutex) * params->initialCapacity + allocations_overhead; // lock per vector + est += sizeof(std::shared_mutex) * params->initialCapacity + + allocations_overhead; // lock per vector } // Explicit allocation calls - always allocate a header. From 1457a5b18136377aad3101887be5d22d328bc9ce Mon Sep 17 00:00:00 2001 From: meiravgri Date: Wed, 12 Jul 2023 10:56:32 +0000 Subject: [PATCH 3/6] read lock instead of write in repair connections --- src/VecSim/algorithms/hnsw/hnsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 40355ee04..39e9d7e02 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -1479,7 +1479,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t nodes_to_update.push_back(deleted_neighbor_id); neighbors_to_remove.push_back(deleted_neighbor_id); - elem_write_mutex_t neighbor_lock(this->element_neighbors_locks_[deleted_neighbor_id]); + elem_read_mutex_t neighbor_lock(this->element_neighbors_locks_[deleted_neighbor_id]); idType *neighbor_neighbours = getNodeNeighborsAtLevel(deleted_neighbor_id, level); linkListSize neighbor_neighbours_count = getNodeNeighborsCount(neighbor_neighbours); From 2d4ff32739769f23d2df2be4e1d13090df13a358 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Thu, 13 Jul 2023 08:13:49 +0000 Subject: [PATCH 4/6] format --- src/VecSim/algorithms/hnsw/hnsw.h | 120 ++++++++++++++++++------------ 1 file changed, 72 insertions(+), 48 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 58cc000e7..60f733a76 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -51,8 +51,6 @@ using graphNodeType = pair; // represented as: (element_id, leve ////////////////////////////////////// Auxiliary HNSW structs ////////////////////////////////////// using elem_mutex_t = std::shared_mutex; -using elem_write_mutex_t = std::unique_lock; -using elem_read_mutex_t = std::shared_lock; // Vectors flags (for marking a specific vector) typedef enum { DELETE_MARK = 0x1, // element is logically deleted, but still exists in the graph @@ -97,7 +95,7 @@ struct LevelData { struct ElementGraphData { size_t toplevel; - std::mutex neighborsGuard; + elem_mutex_t neighborsGuard; LevelData *others; LevelData level0; @@ -296,10 +294,14 @@ class HNSWIndex : public VecSimIndexAbstract, inline auto safeGetEntryPointState() const; inline void lockIndexDataGuard() const; inline void unlockIndexDataGuard() const; - inline void lockNodeLinks(idType node_id) const; - inline void unlockNodeLinks(idType node_id) const; - inline void lockNodeLinks(ElementGraphData *node_data) const; - inline void unlockNodeLinks(ElementGraphData *node_data) const; + inline void writeLockNodeLinks(idType node_id) const; + inline void writeUnlockNodeLinks(idType node_id) const; + inline void writeLockNodeLinks(ElementGraphData *node_data) const; + inline void writeUnlockNodeLinks(ElementGraphData *node_data) const; + inline void readLockNodeLinks(idType node_id) const; + inline void readUnlockNodeLinks(idType node_id) const; + inline void readLockNodeLinks(ElementGraphData *node_data) const; + inline void readUnlockNodeLinks(ElementGraphData *node_data) const; inline VisitedNodesHandler *getVisitedList() const; inline void returnVisitedList(VisitedNodesHandler *visited_nodes_handler) const; VecSimIndexInfo info() const override; @@ -505,24 +507,46 @@ void HNSWIndex::unlockIndexDataGuard() const { indexDataGuard.unlock(); } +/////////////// WRITE LOCKS ///////////////// template -void HNSWIndex::lockNodeLinks(ElementGraphData *node_data) const { +void HNSWIndex::writeLockNodeLinks(ElementGraphData *node_data) const { node_data->neighborsGuard.lock(); } template -void HNSWIndex::unlockNodeLinks(ElementGraphData *node_data) const { +void HNSWIndex::writeUnlockNodeLinks(ElementGraphData *node_data) const { node_data->neighborsGuard.unlock(); } template -void HNSWIndex::lockNodeLinks(idType node_id) const { - lockNodeLinks(getGraphDataByInternalId(node_id)); +void HNSWIndex::writeLockNodeLinks(idType node_id) const { + writeLockNodeLinks(getGraphDataByInternalId(node_id)); } template -void HNSWIndex::unlockNodeLinks(idType node_id) const { - unlockNodeLinks(getGraphDataByInternalId(node_id)); +void HNSWIndex::writeUnlockNodeLinks(idType node_id) const { + writeUnlockNodeLinks(getGraphDataByInternalId(node_id)); +} + +/////////////// READ LOCKS ///////////////// +template +void HNSWIndex::readLockNodeLinks(ElementGraphData *node_data) const { + node_data->neighborsGuard.lock_shared(); +} + +template +void HNSWIndex::readUnlockNodeLinks(ElementGraphData *node_data) const { + node_data->neighborsGuard.unlock_shared(); +} + +template +void HNSWIndex::readLockNodeLinks(idType node_id) const { + readLockNodeLinks(getGraphDataByInternalId(node_id)); +} + +template +void HNSWIndex::readUnlockNodeLinks(idType node_id) const { + readUnlockNodeLinks(getGraphDataByInternalId(node_id)); } /** @@ -582,7 +606,7 @@ void HNSWIndex::processCandidate( candidatesMaxHeap &candidate_set, DistType &lowerBound) const { ElementGraphData *cur_element = getGraphDataByInternalId(curNodeId); - lockNodeLinks(cur_element); + readLockNodeLinks(cur_element); LevelData &node_level = getLevelData(cur_element, layer); if (node_level.numLinks > 0) { @@ -656,7 +680,7 @@ void HNSWIndex::processCandidate( } } } - unlockNodeLinks(cur_element); + readUnlockNodeLinks(cur_element); } template @@ -667,7 +691,7 @@ void HNSWIndex::processCandidate_RangeSearch( candidatesMaxHeap &candidate_set, DistType dyn_range, DistType radius) const { auto *cur_element = getGraphDataByInternalId(curNodeId); - lockNodeLinks(cur_element); + readLockNodeLinks(cur_element); LevelData &node_level = getLevelData(cur_element, layer); if (node_level.numLinks > 0) { @@ -722,7 +746,7 @@ void HNSWIndex::processCandidate_RangeSearch( } } } - unlockNodeLinks(cur_element); + readUnlockNodeLinks(cur_element); } template @@ -868,8 +892,8 @@ void HNSWIndex::revisitNeighborConnections( // Acquire all relevant locks for making the updates for the selected neighbor - all its removed // neighbors, along with the neighbors itself and the cur node. // but first, we release the node and neighbors lock to avoid deadlocks. - unlockNodeLinks(new_node_id); - unlockNodeLinks(selected_neighbor); + writeUnlockNodeLinks(new_node_id); + writeUnlockNodeLinks(selected_neighbor); nodes_to_update.push_back(selected_neighbor); nodes_to_update.push_back(new_node_id); @@ -877,7 +901,7 @@ void HNSWIndex::revisitNeighborConnections( std::sort(nodes_to_update.begin(), nodes_to_update.end()); size_t nodes_to_update_count = nodes_to_update.size(); for (size_t i = 0; i < nodes_to_update_count; i++) { - lockNodeLinks(nodes_to_update[i]); + writeLockNodeLinks(nodes_to_update[i]); } size_t neighbour_neighbours_idx = 0; bool update_cur_node_required = true; @@ -926,7 +950,7 @@ void HNSWIndex::revisitNeighborConnections( // Done updating the neighbor's neighbors. neighbor_level.numLinks = neighbour_neighbours_idx; for (size_t i = 0; i < nodes_to_update_count; i++) { - unlockNodeLinks(nodes_to_update[i]); + writeUnlockNodeLinks(nodes_to_update[i]); } } @@ -962,11 +986,11 @@ idType HNSWIndex::mutuallyConnectNewElement( idType selected_neighbor = neighbor_data.second; // neighbor's id auto *neighbor_graph_data = getGraphDataByInternalId(selected_neighbor); if (new_node_id < selected_neighbor) { - lockNodeLinks(new_node_level); - lockNodeLinks(neighbor_graph_data); + writeLockNodeLinks(new_node_level); + writeLockNodeLinks(neighbor_graph_data); } else { - lockNodeLinks(neighbor_graph_data); - lockNodeLinks(new_node_level); + writeLockNodeLinks(neighbor_graph_data); + writeLockNodeLinks(new_node_level); } // validations... @@ -978,15 +1002,15 @@ idType HNSWIndex::mutuallyConnectNewElement( if (new_node_level_data.numLinks == max_M_cur) { // The new node cannot add more neighbors this->log("Couldn't add all chosen neighbors upon inserting a new node"); - unlockNodeLinks(new_node_level); - unlockNodeLinks(neighbor_graph_data); + writeUnlockNodeLinks(new_node_level); + writeUnlockNodeLinks(neighbor_graph_data); break; } // If one of the two nodes has already deleted - skip the operation. if (isMarkedDeleted(new_node_id) || isMarkedDeleted(selected_neighbor)) { - unlockNodeLinks(new_node_level); - unlockNodeLinks(neighbor_graph_data); + writeUnlockNodeLinks(new_node_level); + writeUnlockNodeLinks(neighbor_graph_data); continue; } @@ -997,8 +1021,8 @@ idType HNSWIndex::mutuallyConnectNewElement( if (neighbor_level_data.numLinks < max_M_cur) { new_node_level_data.links[new_node_level_data.numLinks++] = selected_neighbor; neighbor_level_data.links[neighbor_level_data.numLinks++] = new_node_id; - unlockNodeLinks(new_node_level); - unlockNodeLinks(neighbor_graph_data); + writeUnlockNodeLinks(new_node_level); + writeUnlockNodeLinks(neighbor_graph_data); continue; } @@ -1108,7 +1132,7 @@ void HNSWIndex::replaceEntryPoint() { volatile idType candidate_in_process = INVALID_ID; // Go over the entry point's neighbors at the top level. - lockNodeLinks(old_entry_point); + readLockNodeLinks(old_entry_point); LevelData &old_ep_level = getLevelData(old_entry_point, maxLevel); // Tries to set the (arbitrary) first neighbor as the entry point which is not deleted, // if exists. @@ -1116,7 +1140,7 @@ void HNSWIndex::replaceEntryPoint() { if (!isMarkedDeleted(old_ep_level.links[i])) { if (!isInProcess(old_ep_level.links[i])) { entrypointNode = old_ep_level.links[i]; - unlockNodeLinks(old_entry_point); + readUnlockNodeLinks(old_entry_point); return; } else { // Store this candidate which is currently being inserted into the graph in @@ -1125,7 +1149,7 @@ void HNSWIndex::replaceEntryPoint() { } } } - unlockNodeLinks(old_entry_point); + readUnlockNodeLinks(old_entry_point); // If there is no neighbors in the current level, check for any vector at // this level to be the new entry point. @@ -1276,7 +1300,7 @@ void HNSWIndex::greedySearchLevel(const void *vector_data, s changed = false; auto *element = getGraphDataByInternalId(bestCand); - lockNodeLinks(element); + readLockNodeLinks(element); LevelData &node_level_data = getLevelData(element, level); for (int i = 0; i < node_level_data.numLinks; i++) { @@ -1298,7 +1322,7 @@ void HNSWIndex::greedySearchLevel(const void *vector_data, s } } } - unlockNodeLinks(element); + readUnlockNodeLinks(element); } while (changed); if (!running_query) { bestCand = bestNonDeletedCand; @@ -1314,18 +1338,18 @@ HNSWIndex::safeCollectAllNodeIncomingNeighbors(idType node_i for (size_t level = 0; level <= element->toplevel; level++) { // Save the node neighbor's in the current level while holding its neighbors lock. std::vector neighbors_copy; - lockNodeLinks(element); + readLockNodeLinks(element); auto &node_level_data = getLevelData(element, level); // Store the deleted element's neighbours. neighbors_copy.assign(node_level_data.links, node_level_data.links + node_level_data.numLinks); - unlockNodeLinks(element); + readUnlockNodeLinks(element); // Go over the neighbours and collect tho ones that also points back to the removed node. for (auto neighbour_id : neighbors_copy) { // Hold the neighbor's lock while we are going over its neighbors. auto *neighbor = getGraphDataByInternalId(neighbour_id); - lockNodeLinks(neighbor); + readLockNodeLinks(neighbor); LevelData &neighbour_level_data = getLevelData(neighbor, level); for (size_t j = 0; j < neighbour_level_data.numLinks; j++) { @@ -1335,16 +1359,16 @@ HNSWIndex::safeCollectAllNodeIncomingNeighbors(idType node_i break; } } - unlockNodeLinks(neighbor); + readUnlockNodeLinks(neighbor); } // Next, collect the rest of incoming edges (the ones that are not bidirectional) in the // current level to repair them. - lockNodeLinks(element); + readLockNodeLinks(element); for (auto incoming_edge : *node_level_data.incomingEdges) { incoming_neighbors.emplace_back(incoming_edge, (ushort)level); } - unlockNodeLinks(element); + readUnlockNodeLinks(element); } return incoming_neighbors; } @@ -1405,7 +1429,7 @@ void HNSWIndex::mutuallyUpdateForRepairedNode( std::sort(nodes_to_update.begin(), nodes_to_update.end()); size_t nodes_to_update_count = nodes_to_update.size(); for (size_t i = 0; i < nodes_to_update_count; i++) { - lockNodeLinks(nodes_to_update[i]); + writeLockNodeLinks(nodes_to_update[i]); } LevelData &node_level = getLevelData(node_id, level); @@ -1480,7 +1504,7 @@ void HNSWIndex::mutuallyUpdateForRepairedNode( // Done updating the node's neighbors. node_level.numLinks = node_neighbors_idx; for (size_t i = 0; i < nodes_to_update_count; i++) { - unlockNodeLinks(nodes_to_update[i]); + writeUnlockNodeLinks(nodes_to_update[i]); } } @@ -1502,7 +1526,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t // after the repair as well. const void *node_data = getDataByInternalId(node_id); auto *element = getGraphDataByInternalId(node_id); - lockNodeLinks(element); + readLockNodeLinks(element); LevelData &node_level_data = getLevelData(element, level); for (size_t j = 0; j < node_level_data.numLinks; j++) { node_orig_neighbours_set[node_level_data.links[j]] = true; @@ -1516,7 +1540,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t this->distFunc(node_data, getDataByInternalId(node_level_data.links[j]), this->dim), node_level_data.links[j]); } - unlockNodeLinks(element); + readUnlockNodeLinks(element); // If there are not deleted neighbors at that point the repair job has already been made by // another parallel job, and there is no need to repair the node anymore. @@ -1537,7 +1561,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t neighbors_to_remove.push_back(deleted_neighbor_id); auto *neighbor = getGraphDataByInternalId(deleted_neighbor_id); - lockNodeLinks(neighbor); + readLockNodeLinks(neighbor); LevelData &neighbor_level_data = getLevelData(neighbor, level); for (size_t j = 0; j < neighbor_level_data.numLinks; j++) { @@ -1554,7 +1578,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t this->dim), neighbor_level_data.links[j]); } - unlockNodeLinks(neighbor); + readUnlockNodeLinks(neighbor); } // Copy the original candidates, and run the heuristics. Afterwards, neighbors_candidates will From 36c176a2538614429a192d784666c0097fba56d4 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Sun, 16 Jul 2023 05:16:18 +0000 Subject: [PATCH 5/6] mvoed pragma padding restoreafter all graph data structures --- src/VecSim/algorithms/hnsw/hnsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 60f733a76..a238e6e40 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -75,7 +75,6 @@ struct ElementMetaData { ElementMetaData(labelType label = SIZE_MAX) noexcept : label(label), flags(IN_PROCESS) {} }; -#pragma pack() // restore default packing struct LevelData { vecsim_stl::vector *incomingEdges; @@ -115,6 +114,7 @@ struct ElementGraphData { ~ElementGraphData() = delete; // Should be destroyed using `destroyGraphData` }; +#pragma pack() // restore default packing //////////////////////////////////// HNSW index implementation //////////////////////////////////// template From f038427e0b6ba1c89081a21caeda65fb310730fd Mon Sep 17 00:00:00 2001 From: meiravgri Date: Sun, 16 Jul 2023 06:03:01 +0000 Subject: [PATCH 6/6] revert moving pragma --- src/VecSim/algorithms/hnsw/hnsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index a238e6e40..8358434f5 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -76,6 +76,7 @@ struct ElementMetaData { ElementMetaData(labelType label = SIZE_MAX) noexcept : label(label), flags(IN_PROCESS) {} }; +#pragma pack() // restore default packing struct LevelData { vecsim_stl::vector *incomingEdges; linkListSize numLinks; @@ -114,7 +115,6 @@ struct ElementGraphData { ~ElementGraphData() = delete; // Should be destroyed using `destroyGraphData` }; -#pragma pack() // restore default packing //////////////////////////////////// HNSW index implementation //////////////////////////////////// template