Skip to content

Commit fadfd5f

Browse files
[0.8] [MOD-7297] Replace Variable Length Array on stack with heap allocation (#508)
[MOD-7297] Replace Variable Length Array on stack with heap allocation (#505) * vaseline for BM * fix to baseline * replace stack with allocation * use unique ptr * revert format batch iterator * fix lifetime * fix * fix * fix * rearrange * use ref to allocator instead of pointer * disable flow temp (cherry picked from commit ab96a8d) Co-authored-by: meiravgri <[email protected]>
1 parent 4621276 commit fadfd5f

File tree

9 files changed

+69
-33
lines changed

9 files changed

+69
-33
lines changed

.github/workflows/flow-temp.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
uses: ./.github/workflows/task-unit-test.yml
1616
with:
1717
container: ubuntu:jammy
18-
run-valgrind: false
18+
run-valgrind: true
1919
focal:
2020
uses: ./.github/workflows/task-unit-test.yml
2121
with:

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1820,9 +1820,9 @@ AddVectorCtx HNSWIndex<DataType, DistType>::storeNewElement(labelType label,
18201820
// Create the new element's graph metadata.
18211821
// We must assign manually enough memory on the stack and not just declare an `ElementGraphData`
18221822
// variable, since it has a flexible array member.
1823-
char tmpData[this->elementGraphDataSize];
1824-
memset(tmpData, 0, this->elementGraphDataSize);
1825-
ElementGraphData *cur_egd = (ElementGraphData *)tmpData;
1823+
auto tmpData = this->allocator->allocate_unique(this->elementGraphDataSize);
1824+
memset(tmpData.get(), 0, this->elementGraphDataSize);
1825+
ElementGraphData *cur_egd = (ElementGraphData *)(tmpData.get());
18261826
// Allocate memory (inside `ElementGraphData` constructor) for the links in higher levels and
18271827
// initialize this memory to zeros. The reason for doing it here is that we might mark this
18281828
// vector as deleted BEFORE we finish its indexing. In that case, we will collect the incoming

src/VecSim/algorithms/hnsw/hnsw_serializer.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -160,15 +160,15 @@ void HNSWIndex<DataType, DistType>::restoreGraph(std::ifstream &input) {
160160
unsigned int block_len = 0;
161161
readBinaryPOD(input, block_len);
162162
for (size_t j = 0; j < block_len; j++) {
163-
char cur_vec[this->dataSize];
164-
input.read(cur_vec, this->dataSize);
165-
this->vectorBlocks.back().addElement(cur_vec);
163+
auto cur_vec = this->getAllocator()->allocate_unique(this->dataSize);
164+
input.read(static_cast<char *>(cur_vec.get()), this->dataSize);
165+
this->vectorBlocks.back().addElement(cur_vec.get());
166166
}
167167
}
168168

169169
// Get graph data blocks
170170
ElementGraphData *cur_egt;
171-
char tmpData[this->elementGraphDataSize];
171+
auto tmpData = this->getAllocator()->allocate_unique(this->elementGraphDataSize);
172172
size_t toplevel = 0;
173173
for (size_t i = 0; i < num_blocks; i++) {
174174
this->graphDataBlocks.emplace_back(this->blockSize, this->elementGraphDataSize,
@@ -177,19 +177,20 @@ void HNSWIndex<DataType, DistType>::restoreGraph(std::ifstream &input) {
177177
readBinaryPOD(input, block_len);
178178
for (size_t j = 0; j < block_len; j++) {
179179
// Reset tmpData
180-
memset(tmpData, 0, this->elementGraphDataSize);
180+
memset(tmpData.get(), 0, this->elementGraphDataSize);
181181
// Read the current element top level
182182
readBinaryPOD(input, toplevel);
183183
// Allocate space and structs for the current element
184184
try {
185-
new (tmpData) ElementGraphData(toplevel, this->levelDataSize, this->allocator);
185+
new (tmpData.get())
186+
ElementGraphData(toplevel, this->levelDataSize, this->allocator);
186187
} catch (std::runtime_error &e) {
187188
this->log(VecSimCommonStrings::LOG_WARNING_STRING,
188189
"Error - allocating memory for new element failed due to low memory");
189190
throw e;
190191
}
191192
// Add the current element to the current block, and update cur_egt to point to it.
192-
this->graphDataBlocks.back().addElement(tmpData);
193+
this->graphDataBlocks.back().addElement(tmpData.get());
193194
cur_egt = (ElementGraphData *)this->graphDataBlocks.back().getElement(j);
194195

195196
// Restore the current element's graph data

src/VecSim/algorithms/hnsw/hnsw_tiered.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -504,11 +504,12 @@ void TieredHNSWIndex<DataType, DistType>::executeInsertJob(HNSWInsertJob *job) {
504504
HNSWIndex<DataType, DistType> *hnsw_index = this->getHNSWIndex();
505505
// Copy the vector blob from the flat buffer, so we can release the flat lock while we are
506506
// indexing the vector into HNSW index.
507-
DataType blob_copy[this->frontendIndex->getDim()];
508-
memcpy(blob_copy, this->frontendIndex->getDataByInternalId(job->id),
507+
auto blob_copy = this->getAllocator()->allocate_unique(this->frontendIndex->getDataSize());
508+
509+
memcpy(blob_copy.get(), this->frontendIndex->getDataByInternalId(job->id),
509510
this->frontendIndex->getDim() * sizeof(DataType));
510511

511-
this->insertVectorToHNSW<true>(hnsw_index, job->label, blob_copy);
512+
this->insertVectorToHNSW<true>(hnsw_index, job->label, blob_copy.get());
512513

513514
// Remove the vector and the insert job from the flat buffer.
514515
this->flatIndexGuard.lock();

src/VecSim/memory/vecsim_malloc.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,17 @@ void *VecSimAllocator::callocate(size_t size) {
9999
return nullptr;
100100
}
101101

102+
std::unique_ptr<void, VecSimAllocator::Deleter>
103+
VecSimAllocator::allocate_aligned_unique(size_t size, size_t alignment) {
104+
void *ptr = this->allocate_aligned(size, alignment);
105+
return {ptr, Deleter(*this)};
106+
}
107+
108+
std::unique_ptr<void, VecSimAllocator::Deleter> VecSimAllocator::allocate_unique(size_t size) {
109+
void *ptr = this->allocate(size);
110+
return {ptr, Deleter(*this)};
111+
}
112+
102113
void *VecSimAllocator::operator new(size_t size) { return vecsim_malloc(size); }
103114

104115
void *VecSimAllocator::operator new[](size_t size) { return vecsim_malloc(size); }

src/VecSim/memory/vecsim_malloc.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ struct VecSimAllocator {
2525
static size_t allocation_header_size;
2626
static VecSimMemoryFunctions memFunctions;
2727

28+
// Forward declaration of the deleter for the unique_ptr.
29+
struct Deleter;
2830
VecSimAllocator() : allocated(std::atomic_uint64_t(sizeof(VecSimAllocator))) {}
2931

3032
public:
@@ -36,6 +38,10 @@ struct VecSimAllocator {
3638
void *reallocate(void *p, size_t size);
3739
void free_allocation(void *p);
3840

41+
// Allocations for scope-life-time memory.
42+
std::unique_ptr<void, Deleter> allocate_aligned_unique(size_t size, size_t alignment);
43+
std::unique_ptr<void, Deleter> allocate_unique(size_t size);
44+
3945
void *operator new(size_t size);
4046
void *operator new[](size_t size);
4147
void operator delete(void *p, size_t size);
@@ -55,8 +61,14 @@ struct VecSimAllocator {
5561
static size_t getAllocationOverheadSize() { return allocation_header_size; }
5662

5763
private:
58-
// Retrive the original requested allocation size. Required for remalloc.
64+
// Retrieve the original requested allocation size. Required for remalloc.
5965
inline size_t getPointerAllocationSize(void *p) { return *(((size_t *)p) - 1); }
66+
67+
struct Deleter {
68+
VecSimAllocator &allocator;
69+
explicit constexpr Deleter(VecSimAllocator &allocator) : allocator(allocator) {}
70+
void operator()(void *ptr) const { allocator.free_allocation(ptr); }
71+
};
6072
};
6173

6274
/**

src/VecSim/spaces/normalize/normalize_naive.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "VecSim/types/bfloat16.h"
1010
#include "VecSim/types/float16.h"
1111
#include <cmath>
12+
#include <vector>
1213

1314
using bfloat16 = vecsim_types::bfloat16;
1415
using float16 = vecsim_types::float16;
@@ -35,7 +36,7 @@ template <bool is_little>
3536
static inline void bfloat16_normalizeVector(void *vec, const size_t dim) {
3637
bfloat16 *input_vector = (bfloat16 *)vec;
3738

38-
float f32_tmp[dim];
39+
std::vector<float> f32_tmp(dim);
3940

4041
float sum = 0;
4142

@@ -55,7 +56,7 @@ static inline void bfloat16_normalizeVector(void *vec, const size_t dim) {
5556
static inline void float16_normalizeVector(void *vec, const size_t dim) {
5657
float16 *input_vector = (float16 *)vec;
5758

58-
float f32_tmp[dim];
59+
std::vector<float> f32_tmp(dim);
5960

6061
float sum = 0;
6162

src/VecSim/vec_sim_index.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -206,33 +206,37 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
206206

207207
protected:
208208
virtual int addVectorWrapper(const void *blob, labelType label, void *auxiliaryCtx) override {
209-
char PORTABLE_ALIGN aligned_mem[this->dataSize];
210-
const void *processed_blob = processBlob(blob, aligned_mem);
209+
auto aligned_mem =
210+
this->getAllocator()->allocate_aligned_unique(this->dataSize, this->alignment);
211+
const void *processed_blob = processBlob(blob, aligned_mem.get());
211212

212213
return this->addVector(processed_blob, label, auxiliaryCtx);
213214
}
214215

215216
virtual VecSimQueryReply *topKQueryWrapper(const void *queryBlob, size_t k,
216217
VecSimQueryParams *queryParams) const override {
217-
char PORTABLE_ALIGN aligned_mem[this->dataSize];
218-
const void *processed_blob = processBlob(queryBlob, aligned_mem);
218+
auto aligned_mem =
219+
this->getAllocator()->allocate_aligned_unique(this->dataSize, this->alignment);
220+
const void *processed_blob = processBlob(queryBlob, aligned_mem.get());
219221

220222
return this->topKQuery(processed_blob, k, queryParams);
221223
}
222224

223225
virtual VecSimQueryReply *rangeQueryWrapper(const void *queryBlob, double radius,
224226
VecSimQueryParams *queryParams,
225227
VecSimQueryReply_Order order) const override {
226-
char PORTABLE_ALIGN aligned_mem[this->dataSize];
227-
const void *processed_blob = processBlob(queryBlob, aligned_mem);
228+
auto aligned_mem =
229+
this->getAllocator()->allocate_aligned_unique(this->dataSize, this->alignment);
230+
const void *processed_blob = processBlob(queryBlob, aligned_mem.get());
228231

229232
return this->rangeQuery(processed_blob, radius, queryParams, order);
230233
}
231234

232235
virtual VecSimBatchIterator *
233236
newBatchIteratorWrapper(const void *queryBlob, VecSimQueryParams *queryParams) const override {
234-
char PORTABLE_ALIGN aligned_mem[this->dataSize];
235-
const void *processed_blob = processBlob(queryBlob, aligned_mem);
237+
auto aligned_mem =
238+
this->getAllocator()->allocate_aligned_unique(this->dataSize, this->alignment);
239+
const void *processed_blob = processBlob(queryBlob, aligned_mem.get());
236240

237241
return this->newBatchIterator(processed_blob, queryParams);
238242
}

src/VecSim/vec_sim_tiered_index.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,31 +109,37 @@ class VecSimTieredIndex : public VecSimIndexInterface {
109109

110110
private:
111111
virtual int addVectorWrapper(const void *blob, labelType label, void *auxiliaryCtx) override {
112-
char PORTABLE_ALIGN aligned_mem[this->backendIndex->getDataSize()];
113-
const void *processed_blob = this->backendIndex->processBlob(blob, aligned_mem);
112+
auto aligned_mem = this->getAllocator()->allocate_aligned_unique(
113+
this->backendIndex->getDataSize(), this->backendIndex->getAlignment());
114+
const void *processed_blob = this->backendIndex->processBlob(blob, aligned_mem.get());
115+
114116
return this->addVector(processed_blob, label, auxiliaryCtx);
115117
}
116118

117119
virtual VecSimQueryReply *topKQueryWrapper(const void *queryBlob, size_t k,
118120
VecSimQueryParams *queryParams) const override {
119-
char PORTABLE_ALIGN aligned_mem[this->backendIndex->getDataSize()];
120-
const void *processed_blob = this->backendIndex->processBlob(queryBlob, aligned_mem);
121+
auto aligned_mem = this->getAllocator()->allocate_aligned_unique(
122+
this->backendIndex->getDataSize(), this->backendIndex->getAlignment());
123+
const void *processed_blob = this->backendIndex->processBlob(queryBlob, aligned_mem.get());
124+
121125
return this->topKQuery(processed_blob, k, queryParams);
122126
}
123127

124128
virtual VecSimQueryReply *rangeQueryWrapper(const void *queryBlob, double radius,
125129
VecSimQueryParams *queryParams,
126130
VecSimQueryReply_Order order) const override {
127-
char PORTABLE_ALIGN aligned_mem[this->backendIndex->getDataSize()];
128-
const void *processed_blob = this->backendIndex->processBlob(queryBlob, aligned_mem);
131+
auto aligned_mem = this->getAllocator()->allocate_aligned_unique(
132+
this->backendIndex->getDataSize(), this->backendIndex->getAlignment());
133+
const void *processed_blob = this->backendIndex->processBlob(queryBlob, aligned_mem.get());
129134

130135
return this->rangeQuery(processed_blob, radius, queryParams, order);
131136
}
132137

133138
virtual VecSimBatchIterator *
134139
newBatchIteratorWrapper(const void *queryBlob, VecSimQueryParams *queryParams) const override {
135-
char PORTABLE_ALIGN aligned_mem[this->backendIndex->getDataSize()];
136-
const void *processed_blob = this->backendIndex->processBlob(queryBlob, aligned_mem);
140+
auto aligned_mem = this->getAllocator()->allocate_aligned_unique(
141+
this->backendIndex->getDataSize(), this->backendIndex->getAlignment());
142+
const void *processed_blob = this->backendIndex->processBlob(queryBlob, aligned_mem.get());
137143

138144
return this->newBatchIterator(processed_blob, queryParams);
139145
}

0 commit comments

Comments
 (0)