Skip to content

Commit b632185

Browse files
vlaskyMarcus Pousette
andcommitted
Fix memory leak by clearing vectors and rowids on delete
Implements proper cleanup when deleting rows to prevent memory/storage leaks: - vec0Update_Delete_ClearRowid(): Zeros out rowid slot in chunks.rowids blob - vec0Update_Delete_ClearVectors(): Zeros out vector data in all vector_chunks Additional improvements: - Add -undefined dynamic_lookup flag for macOS builds (standard for SQLite extensions) - Add test-snapshots-update Makefile target - Add comprehensive tests to verify delete properly clears bytes - Add .venv to .gitignore Merged from upstream PR asg017#243 by marcus-pousette. Fixes issues asg017#54, asg017#178, asg017#220. Co-Authored-By: Marcus Pousette <[email protected]>
1 parent 12f4e05 commit b632185

File tree

7 files changed

+233
-16
lines changed

7 files changed

+233
-16
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ sift/
77
*.bin
88
*.out
99
venv/
10+
.venv
1011

1112
vendor/
1213
dist/

Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ endif
2424

2525
ifdef CONFIG_DARWIN
2626
LOADABLE_EXTENSION=dylib
27+
# Let unresolved SQLite symbols resolve against host at load time
28+
# This is standard for SQLite loadable extensions on macOS.
29+
CFLAGS += -undefined dynamic_lookup
2730
endif
2831

2932
ifdef CONFIG_LINUX
@@ -193,6 +196,10 @@ test-loadable: loadable
193196
test-loadable-snapshot-update: loadable
194197
$(PYTHON) -m pytest -vv tests/test-loadable.py --snapshot-update
195198

199+
# Update snapshots for all loadable tests (use after intentional behavior changes)
200+
test-snapshots-update: loadable
201+
$(PYTHON) -m pytest -vv tests/test-*.py --snapshot-update
202+
196203
test-loadable-watch:
197204
watchexec --exts c,py,Makefile --clear -- make test-loadable
198205

sqlite-vec.c

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8471,6 +8471,101 @@ int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) {
84718471
return rc;
84728472
}
84738473

8474+
// Clear the rowid slot in v_chunks.rowids for the given chunk/offset
8475+
int vec0Update_Delete_ClearRowid(vec0_vtab *p, i64 chunk_id, i64 chunk_offset) {
8476+
int rc;
8477+
sqlite3_blob *blobChunksRowids = NULL;
8478+
8479+
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
8480+
chunk_id, 1, &blobChunksRowids);
8481+
if (rc != SQLITE_OK) {
8482+
vtab_set_error(&p->base, "could not open rowids blob for %s.%s.%lld",
8483+
p->schemaName, p->shadowChunksName, chunk_id);
8484+
return SQLITE_ERROR;
8485+
}
8486+
8487+
i64 expected = p->chunk_size * sizeof(i64);
8488+
i64 actual = sqlite3_blob_bytes(blobChunksRowids);
8489+
if (expected != actual) {
8490+
vtab_set_error(&p->base,
8491+
VEC_INTERAL_ERROR
8492+
"rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
8493+
p->schemaName, p->shadowChunksName, chunk_id, expected, actual);
8494+
sqlite3_blob_close(blobChunksRowids);
8495+
return SQLITE_ERROR;
8496+
}
8497+
8498+
i64 zero = 0;
8499+
rc = sqlite3_blob_write(blobChunksRowids, &zero, sizeof(i64),
8500+
chunk_offset * sizeof(i64));
8501+
int brc = sqlite3_blob_close(blobChunksRowids);
8502+
if (rc != SQLITE_OK) {
8503+
vtab_set_error(&p->base, "could not write rowids blob on %s.%s.%lld",
8504+
p->schemaName, p->shadowChunksName, chunk_id);
8505+
return rc;
8506+
}
8507+
if (brc != SQLITE_OK) {
8508+
vtab_set_error(&p->base,
8509+
"could not close rowids blob on %s.%s.%lld",
8510+
p->schemaName, p->shadowChunksName, chunk_id);
8511+
return brc;
8512+
}
8513+
return SQLITE_OK;
8514+
}
8515+
8516+
// Clear the vector bytes for each vector column at the given chunk/offset
8517+
int vec0Update_Delete_ClearVectors(vec0_vtab *p, i64 chunk_id, i64 chunk_offset) {
8518+
for (int i = 0; i < p->numVectorColumns; i++) {
8519+
int rc;
8520+
sqlite3_blob *blobVectors = NULL;
8521+
8522+
rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
8523+
"vectors", chunk_id, 1, &blobVectors);
8524+
if (rc != SQLITE_OK) {
8525+
vtab_set_error(&p->base, "Could not open vectors blob for %s.%s.%lld",
8526+
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
8527+
return rc;
8528+
}
8529+
8530+
i64 expected = p->chunk_size * vector_column_byte_size(p->vector_columns[i]);
8531+
i64 actual = sqlite3_blob_bytes(blobVectors);
8532+
if (expected != actual) {
8533+
vtab_set_error(&p->base,
8534+
VEC_INTERAL_ERROR
8535+
"vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
8536+
p->schemaName, p->shadowVectorChunksNames[i], chunk_id, expected, actual);
8537+
sqlite3_blob_close(blobVectors);
8538+
return SQLITE_ERROR;
8539+
}
8540+
8541+
size_t nbytes = vector_column_byte_size(p->vector_columns[i]);
8542+
void *zeros = sqlite3_malloc(nbytes);
8543+
if (!zeros) {
8544+
sqlite3_blob_close(blobVectors);
8545+
return SQLITE_NOMEM;
8546+
}
8547+
memset(zeros, 0, nbytes);
8548+
rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, zeros,
8549+
p->vector_columns[i].dimensions,
8550+
p->vector_columns[i].element_type);
8551+
sqlite3_free(zeros);
8552+
8553+
int brc = sqlite3_blob_close(blobVectors);
8554+
if (rc != SQLITE_OK) {
8555+
vtab_set_error(&p->base, "Could not write to vectors blob for %s.%s.%lld",
8556+
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
8557+
return rc;
8558+
}
8559+
if (brc != SQLITE_OK) {
8560+
vtab_set_error(&p->base,
8561+
"Could not commit blob transaction for vectors blob for %s.%s.%lld",
8562+
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
8563+
return brc;
8564+
}
8565+
}
8566+
return SQLITE_OK;
8567+
}
8568+
84748569
int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) {
84758570
int rc;
84768571
sqlite3_stmt *stmt = NULL;
@@ -8611,9 +8706,17 @@ int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {
86118706

86128707
// 3. zero out rowid in chunks.rowids
86138708
// https://github.com/asg017/sqlite-vec/issues/54
8709+
rc = vec0Update_Delete_ClearRowid(p, chunk_id, chunk_offset);
8710+
if (rc != SQLITE_OK) {
8711+
return rc;
8712+
}
86148713

86158714
// 4. zero out any data in vector chunks tables
86168715
// https://github.com/asg017/sqlite-vec/issues/54
8716+
rc = vec0Update_Delete_ClearVectors(p, chunk_id, chunk_offset);
8717+
if (rc != SQLITE_OK) {
8718+
return rc;
8719+
}
86178720

86188721
// 5. delete from _rowids table
86198722
rc = vec0Update_Delete_DeleteRowids(p, rowid);

tests/__snapshots__/test-auxiliary.ambr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@
137137
'chunk_id': 1,
138138
'size': 8,
139139
'validity': b'\x06',
140-
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
140+
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
141141
}),
142142
]),
143143
}),
@@ -163,7 +163,7 @@
163163
'rows': list([
164164
OrderedDict({
165165
'rowid': 1,
166-
'vectors': b'\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
166+
'vectors': b'\x00\x00\x00\x00\x00\x00\x00@\x00\x00@@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
167167
}),
168168
]),
169169
}),

tests/__snapshots__/test-general.ambr

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,47 +126,47 @@
126126
'rows': list([
127127
OrderedDict({
128128
'schema': 'main',
129-
'name': 'v_auxiliary',
129+
'name': 'v_metadatatext00',
130130
'type': 'shadow',
131131
'ncol': 2,
132132
'wr': 0,
133133
'strict': 0,
134134
}),
135135
OrderedDict({
136136
'schema': 'main',
137-
'name': 'v_chunks',
137+
'name': 'v_metadatachunks00',
138138
'type': 'shadow',
139-
'ncol': 6,
139+
'ncol': 2,
140140
'wr': 0,
141141
'strict': 0,
142142
}),
143143
OrderedDict({
144144
'schema': 'main',
145-
'name': 'v_info',
145+
'name': 'v_rowids',
146146
'type': 'shadow',
147-
'ncol': 2,
147+
'ncol': 4,
148148
'wr': 0,
149149
'strict': 0,
150150
}),
151151
OrderedDict({
152152
'schema': 'main',
153-
'name': 'v_rowids',
153+
'name': 'v_auxiliary',
154154
'type': 'shadow',
155-
'ncol': 4,
155+
'ncol': 2,
156156
'wr': 0,
157157
'strict': 0,
158158
}),
159159
OrderedDict({
160160
'schema': 'main',
161-
'name': 'v_metadatachunks00',
161+
'name': 'v_chunks',
162162
'type': 'shadow',
163-
'ncol': 2,
163+
'ncol': 6,
164164
'wr': 0,
165165
'strict': 0,
166166
}),
167167
OrderedDict({
168168
'schema': 'main',
169-
'name': 'v_metadatatext00',
169+
'name': 'v_info',
170170
'type': 'shadow',
171171
'ncol': 2,
172172
'wr': 0,

tests/__snapshots__/test-metadata.ambr

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
'chunk_id': 1,
2929
'size': 8,
3030
'validity': b'\x02',
31-
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
31+
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
3232
}),
3333
]),
3434
}),
@@ -89,7 +89,7 @@
8989
'rows': list([
9090
OrderedDict({
9191
'rowid': 1,
92-
'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
92+
'vectors': b'\x00\x00\x00\x00""""\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
9393
}),
9494
]),
9595
}),
@@ -264,7 +264,7 @@
264264
'chunk_id': 1,
265265
'size': 8,
266266
'validity': b'\x06',
267-
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
267+
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
268268
}),
269269
]),
270270
}),
@@ -335,7 +335,7 @@
335335
'rows': list([
336336
OrderedDict({
337337
'rowid': 1,
338-
'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
338+
'vectors': b'\x00\x00\x00\x00""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
339339
}),
340340
]),
341341
}),

tests/test-delete-clears-bytes.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import os
2+
3+
4+
def test_delete_clears_rowid_and_vectors():
5+
try:
6+
import pysqlite3 as sqlite3 # uses bundled modern SQLite with extension loading
7+
except ImportError: # fallback if not available
8+
import sqlite3
9+
10+
db = sqlite3.connect(":memory:")
11+
db.row_factory = sqlite3.Row
12+
if hasattr(db, "enable_load_extension"):
13+
db.enable_load_extension(True)
14+
ext = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "dist", "vec0"))
15+
try:
16+
# Explicit entrypoint to avoid relying on default name
17+
db.load_extension(ext, "sqlite3_vec_init")
18+
except Exception:
19+
# Some loaders accept missing suffix path without explicit entrypoint
20+
db.load_extension(ext)
21+
22+
# One vector column with 1 dimension (4 bytes per vector), chunk_size=8
23+
db.execute("create virtual table v using vec0(vector float[1], chunk_size=8)")
24+
25+
# Insert two rows with distinct raw vector bytes
26+
db.execute(
27+
"insert into v(rowid, vector) values (?, ?)",
28+
[1, b"\x11\x11\x11\x11"],
29+
)
30+
db.execute(
31+
"insert into v(rowid, vector) values (?, ?)",
32+
[2, b"\x22\x22\x22\x22"],
33+
)
34+
35+
# Sanity check pre-delete: validity has first two bits set (0b00000011)
36+
row = db.execute("select validity, rowids from v_chunks").fetchone()
37+
assert row is not None
38+
assert row[0] == b"\x03"
39+
40+
# Delete rowid=1
41+
db.execute("delete from v where rowid = 1")
42+
43+
# After delete, validity should only have bit 1 set (0b00000010)
44+
row = db.execute("select validity, rowids from v_chunks").fetchone()
45+
assert row[0] == b"\x02"
46+
47+
# Rowids BLOB: first 8 bytes (slot 0) must be zero; second (slot 1) must be rowid=2
48+
rowids = row[1]
49+
assert isinstance(rowids, (bytes, bytearray))
50+
assert len(rowids) == 8 * 8 # chunk_size * sizeof(i64)
51+
assert rowids[0:8] == b"\x00" * 8
52+
assert rowids[8:16] == b"\x02\x00\x00\x00\x00\x00\x00\x00"
53+
54+
# Vectors BLOB for the first (and only) vector column
55+
vectors_row = db.execute("select vectors from v_vector_chunks00").fetchone()
56+
vectors = vectors_row[0]
57+
# chunk_size (8) * 4 bytes per float32 = 32 bytes
58+
assert len(vectors) == 32
59+
# Slot 0 cleared to zeros, slot 1 left as inserted (0x22 0x22 0x22 0x22)
60+
assert vectors[0:4] == b"\x00\x00\x00\x00"
61+
assert vectors[4:8] == b"\x22\x22\x22\x22"
62+
63+
64+
def test_vacuum_shrinks_file(tmp_path):
65+
try:
66+
import pysqlite3 as sqlite3
67+
except ImportError:
68+
import sqlite3
69+
70+
db_path = tmp_path / "vacuum_vec.db"
71+
72+
con = sqlite3.connect(str(db_path))
73+
con.row_factory = sqlite3.Row
74+
if hasattr(con, "enable_load_extension"):
75+
con.enable_load_extension(True)
76+
ext = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "dist", "vec0"))
77+
try:
78+
con.load_extension(ext)
79+
except Exception:
80+
# Some platforms require the full filename or default entrypoint; fallback already tried
81+
con.load_extension(ext)
82+
83+
# Use a larger chunk_size to inflate file size more clearly
84+
con.execute("create virtual table v using vec0(vector float[1], chunk_size=4096)")
85+
86+
# Insert a decent number of rows to grow the DB
87+
N = 10000
88+
con.executemany(
89+
"insert into v(rowid, vector) values(?, ?)",
90+
((i, b"\x11\x11\x11\x11") for i in range(1, N + 1)),
91+
)
92+
con.commit()
93+
94+
size_after_insert = os.stat(db_path).st_size
95+
assert size_after_insert > 0
96+
97+
# Drop the table to free its pages, then VACUUM to rewrite/shrink the file
98+
con.execute("drop table v")
99+
con.commit()
100+
con.execute("VACUUM")
101+
con.close()
102+
103+
size_after_vacuum = os.stat(db_path).st_size
104+
105+
# File should shrink after dropping the table and VACUUM
106+
assert size_after_vacuum < size_after_insert

0 commit comments

Comments
 (0)