From 22ceb14b2feae169742ec1e2075824e2c06fe937 Mon Sep 17 00:00:00 2001 From: Wilbert Harriman Date: Wed, 19 Feb 2025 14:39:54 +0800 Subject: [PATCH 1/7] support table name among column list --- sqlite-vec.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/sqlite-vec.c b/sqlite-vec.c index 3cc802f0..b0012eda 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -3376,6 +3376,7 @@ static sqlite3_module vec_npy_eachModule = { #define VEC0_COLUMN_USERN_START 1 #define VEC0_COLUMN_OFFSET_DISTANCE 1 #define VEC0_COLUMN_OFFSET_K 2 +#define VEC0_COLUMN_OFFSET_TABLE_NAME 3 #define VEC0_SHADOW_INFO_NAME "\"%w\".\"%w_info\"" @@ -3645,6 +3646,17 @@ int vec0_column_k_idx(vec0_vtab *p) { VEC0_COLUMN_OFFSET_K; } +/** + * @brief Returns the index of the table_name hidden column for the given vec0 table. + * + * @param p vec0 table + * @return int + */ +int vec0_column_table_name_idx(vec0_vtab *p) { + return VEC0_COLUMN_USERN_START + (vec0_num_defined_user_columns(p) - 1) + + VEC0_COLUMN_OFFSET_TABLE_NAME; +} + /** * Returns 1 if the given column-based index is a valid vector column, * 0 otherwise. @@ -4862,6 +4874,9 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, goto error; } + const char *schemaName = argv[1]; + const char *tableName = argv[2]; + sqlite3_str *createStr = sqlite3_str_new(NULL); sqlite3_str_appendall(createStr, "CREATE TABLE x("); if (pkColumnName) { @@ -4903,7 +4918,8 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, } } - sqlite3_str_appendall(createStr, " distance hidden, k hidden) "); + sqlite3_str_appendall(createStr, " distance hidden, k hidden, "); + sqlite3_str_appendf(createStr, "%s hidden) ", tableName); if (pkColumnName) { sqlite3_str_appendall(createStr, "without rowid "); } @@ -4920,9 +4936,6 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, goto error; } - const char *schemaName = argv[1]; - const char *tableName = argv[2]; - pNew->db = db; pNew->pkIsText = pkColumnType == SQLITE_TEXT; pNew->schemaName = sqlite3_mprintf("%s", schemaName); @@ -8230,6 +8243,13 @@ int vec0Update_Insert(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, goto cleanup; } + // Cannot insert a value in the hidden "table_name" column + if (sqlite3_value_type(argv[2 + vec0_column_table_name_idx(p)]) != SQLITE_NULL) { + vtab_set_error(pVTab, "A value was provided for the hidden \"table_name\" column."); + rc = SQLITE_ERROR; + goto cleanup; + } + // Step #1: Insert/get a rowid for this row, from the _rowids table. rc = vec0Update_InsertRowidStep(p, argv[2 + VEC0_COLUMN_ID], &rowid); if (rc != SQLITE_OK) { From e52ac41491f9e97150572ddd50bf8c27ea16ddb6 Mon Sep 17 00:00:00 2001 From: Wilbert Harriman Date: Mon, 10 Mar 2025 16:44:55 +0800 Subject: [PATCH 2/7] accept custom vacuum command --- sqlite-vec.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sqlite-vec.c b/sqlite-vec.c index b0012eda..5be26f08 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -8828,8 +8828,32 @@ int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) { return SQLITE_OK; } +int vec0Update_SpecialInsert_Optimize(vec0_vtab *p) { + return SQLITE_OK; +} + +int vec0Update_SpecialInsert(sqlite3_vtab *pVTab, sqlite3_value *pVal) { + vec0_vtab *p = (vec0_vtab *)pVTab; + + const char *cmd = (const char *)sqlite3_value_text(pVal); + int n_bytes = sqlite3_value_bytes(pVal); + + if (!cmd) { + return SQLITE_NOMEM; + } + if (n_bytes == 8 && sqlite3_strnicmp(cmd, "optimize", 8) == 0) { + return vec0Update_SpecialInsert_Optimize(p); + } + return SQLITE_ERROR; +} + static int vec0Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv, sqlite_int64 *pRowid) { + // Special insert + if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL && + sqlite3_value_type(argv[2 + vec0_column_table_name_idx((vec0_vtab*) pVTab)]) != SQLITE_NULL) { + return vec0Update_SpecialInsert(pVTab, argv[2 + vec0_column_table_name_idx((vec0_vtab*) pVTab)]); + } // DELETE operation if (argc == 1 && sqlite3_value_type(argv[0]) != SQLITE_NULL) { return vec0Update_Delete(pVTab, argv[0]); From 8f3d48a7f9c0524294d079b62dfbc07787f632b2 Mon Sep 17 00:00:00 2001 From: Wilbert Harriman Date: Tue, 11 Mar 2025 11:05:12 +0800 Subject: [PATCH 3/7] add helper for copying metadata --- sqlite-vec.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/sqlite-vec.c b/sqlite-vec.c index 5be26f08..218c59cf 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -8828,6 +8828,91 @@ int vec0Update_Update(sqlite3_vtab *pVTab, int argc, sqlite3_value **argv) { return SQLITE_OK; } +int vec0Update_SpecialInsert_OptimizeCopyMetadata(vec0_vtab *p, int metadata_column_idx, i64 src_chunk_id, i64 src_chunk_offset, i64 dst_chunk_id, i64 dst_chunk_offset) { + int rc; + struct Vec0MetadataColumnDefinition * metadata_column = &p->metadata_columns[metadata_column_idx]; + vec0_metadata_column_kind kind = metadata_column->kind; + + sqlite3_blob *srcBlob, *dstBlob; + rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", src_chunk_id, 0, &srcBlob); + if (rc != SQLITE_OK) { + return rc; + } + rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", dst_chunk_id, 1, &dstBlob); + if (rc != SQLITE_OK) { + sqlite3_blob_close(srcBlob); + return rc; + } + switch (kind) { + case VEC0_METADATA_COLUMN_KIND_BOOLEAN: { + u8 srcBlock, dstBlock; + rc = sqlite3_blob_read(srcBlob, &srcBlock, sizeof(u8), (int) (src_chunk_offset / CHAR_BIT)); + if (rc != SQLITE_OK) { + goto done; + } + int value = (srcBlock >> (src_chunk_offset % CHAR_BIT)) & 1; + + rc = sqlite3_blob_read(dstBlob, &dstBlock, sizeof(u8), (int) (dst_chunk_offset / CHAR_BIT)); + if (rc != SQLITE_OK) { + goto done; + } + if (value) { + dstBlock |= 1 << (dst_chunk_offset % CHAR_BIT); + } else { + dstBlock &= ~(1 << (dst_chunk_offset % CHAR_BIT)); + } + rc = sqlite3_blob_write(dstBlob, &dstBlock, sizeof(u8), dst_chunk_offset / CHAR_BIT); + if (rc != SQLITE_OK) { + goto done; + } + break; + } + case VEC0_METADATA_COLUMN_KIND_INTEGER: { + i64 value; + rc = sqlite3_blob_read(srcBlob, &value, sizeof(i64), src_chunk_offset * sizeof(i64)); + if (rc != SQLITE_OK) { + goto done; + } + rc = sqlite3_blob_write(dstBlob, &value, sizeof(i64), dst_chunk_offset * sizeof(i64)); + if (rc != SQLITE_OK) { + goto done; + } + break; + } + case VEC0_METADATA_COLUMN_KIND_FLOAT: { + double value; + rc = sqlite3_blob_read(srcBlob, &value, sizeof(double), src_chunk_offset * sizeof(double)); + if (rc != SQLITE_OK) { + goto done; + } + rc = sqlite3_blob_write(dstBlob, &value, sizeof(double), dst_chunk_offset * sizeof(double)); + if (rc != SQLITE_OK) { + goto done; + } + break; + } + case VEC0_METADATA_COLUMN_KIND_TEXT: { + u8 view[VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH]; + rc = sqlite3_blob_read(srcBlob, view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, src_chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + if (rc != SQLITE_OK) { + goto done; + } + rc = sqlite3_blob_write(dstBlob, view, VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH, dst_chunk_offset * VEC0_METADATA_TEXT_VIEW_BUFFER_LENGTH); + if (rc != SQLITE_OK) { + goto done; + } + break; + } + } +done: + rc = sqlite3_blob_close(srcBlob); + if (rc == SQLITE_OK) { + rc = sqlite3_blob_close(dstBlob); + } + + return rc; +} + int vec0Update_SpecialInsert_Optimize(vec0_vtab *p) { return SQLITE_OK; } From 0fe81566dafeadd0a894f1a9b8d52f4aa5552314 Mon Sep 17 00:00:00 2001 From: Wilbert Harriman Date: Tue, 11 Mar 2025 11:06:19 +0800 Subject: [PATCH 4/7] support custom vacuum command on vec0 tables --- sqlite-vec.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 215 insertions(+), 1 deletion(-) diff --git a/sqlite-vec.c b/sqlite-vec.c index 218c59cf..bb7936af 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -8836,10 +8836,12 @@ int vec0Update_SpecialInsert_OptimizeCopyMetadata(vec0_vtab *p, int metadata_col sqlite3_blob *srcBlob, *dstBlob; rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", src_chunk_id, 0, &srcBlob); if (rc != SQLITE_OK) { + vtab_set_error(&p->base, "Failed to open %s blob", p->shadowMetadataChunksNames[metadata_column_idx]); return rc; } rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowMetadataChunksNames[metadata_column_idx], "data", dst_chunk_id, 1, &dstBlob); if (rc != SQLITE_OK) { + vtab_set_error(&p->base, "Failed to open %s blob", p->shadowMetadataChunksNames[metadata_column_idx]); sqlite3_blob_close(srcBlob); return rc; } @@ -8914,7 +8916,219 @@ int vec0Update_SpecialInsert_OptimizeCopyMetadata(vec0_vtab *p, int metadata_col } int vec0Update_SpecialInsert_Optimize(vec0_vtab *p) { - return SQLITE_OK; + sqlite3_stmt *stmt = NULL, *partition_key_stmt = NULL; + int rc; + const char *zSql; + i64 prev_max_chunk_rowid = -1; + sqlite3_value *partitionKeyValues[VEC0_MAX_PARTITION_COLUMNS]; + + // 1) get the current maximum chunk_id + zSql = sqlite3_mprintf("SELECT max(rowid) FROM " VEC0_SHADOW_CHUNKS_NAME, p->schemaName, p->tableName); + if (!zSql) { + rc = SQLITE_NOMEM; + goto done; + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if ((rc != SQLITE_OK)) { + rc = SQLITE_ERROR; + goto done; + } + rc = sqlite3_step(stmt); + if (rc != SQLITE_ROW || sqlite3_column_type(stmt, 0) == SQLITE_NULL) { + if (rc == SQLITE_ROW) { + // no chunks to clear + rc = SQLITE_OK; + } else { + rc = SQLITE_ERROR; + } + goto cleanup; + } + prev_max_chunk_rowid = sqlite3_column_int64(stmt, 0); + if (sqlite3_step(stmt) != SQLITE_DONE) { + rc = SQLITE_ERROR; + goto cleanup; + } + sqlite3_finalize(stmt); + + // 2) for each row get the chunk_id for its partition key (if any), if the chunk_id is less than + // the previous maximum chunk_id, a new chunk needs to be created + zSql = sqlite3_mprintf("SELECT rowid, chunk_id, chunk_offset FROM " VEC0_SHADOW_ROWIDS_NAME, + p->schemaName, p->tableName); + if (!zSql) { + rc = SQLITE_NOMEM; + goto done; + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, NULL); + sqlite3_free((void *)zSql); + if (rc != SQLITE_OK) { + goto done; + } + + if (p->numPartitionColumns > 0) { + sqlite3_str * s = sqlite3_str_new(NULL); + sqlite3_str_appendall(s, "SELECT "); + for (int i = 0; i < p->numPartitionColumns; i++) { + if (i == 0) sqlite3_str_appendf(s, "partition%02d", i); + else sqlite3_str_appendf(s, ", partition%02d", i); + } + sqlite3_str_appendf(s, " FROM " VEC0_SHADOW_CHUNKS_NAME, p->schemaName, p->tableName); + sqlite3_str_appendall(s, " WHERE chunk_id = ?"); + zSql = sqlite3_str_finish(s); + if (!zSql) { + rc = SQLITE_NOMEM; + goto cleanup; + } + rc = sqlite3_prepare_v2(p->db, zSql, -1, &partition_key_stmt, NULL); + sqlite3_free((void *)zSql); + if (rc != SQLITE_OK) { + goto cleanup; + } + } + + i64 rowid, chunk_id, chunk_offset; + i64 new_chunk_id, new_chunk_offset; + sqlite3_blob *blobChunksValidity = NULL; + const unsigned char *bufferChunksValidity = NULL; + void *vectorDatas[VEC0_MAX_VECTOR_COLUMNS]; + while ((rc = sqlite3_step(stmt)) == SQLITE_ROW) { + rowid = sqlite3_column_int64(stmt, 0); + chunk_id = sqlite3_column_int64(stmt, 1); + chunk_offset = sqlite3_column_int64(stmt, 2); + + // get the partition key for a row + if (p->numPartitionColumns > 0) { + sqlite3_reset(partition_key_stmt); + sqlite3_clear_bindings(partition_key_stmt); + sqlite3_bind_int64(partition_key_stmt, 1, chunk_id); + if (sqlite3_step(partition_key_stmt) != SQLITE_ROW) { + goto cleanup; + } + + for (int i = 0; i < p->numPartitionColumns; i++) { + partitionKeyValues[i] = sqlite3_column_value(partition_key_stmt, i); + } + } + + // get the latest chunk_id for a partition key + rc = vec0_get_latest_chunk_rowid(p, &new_chunk_id, partitionKeyValues); + if (rc != SQLITE_OK) { + goto cleanup; + } + + // create a new chunk if the latest chunk_id for a partition key is less than the previous maximum chunk_id + if (new_chunk_id <= prev_max_chunk_rowid) { + rc = vec0_new_chunk(p, partitionKeyValues, NULL); + if (rc != SQLITE_OK) { + goto cleanup; + } + } + // get the vector data from all vector columns of a row + for (int i = 0; i < p->numVectorColumns; i++) { + rc = vec0_get_vector_data(p, rowid, i, &vectorDatas[i], NULL); + if (rc != SQLITE_OK) { + goto cleanup; + } + } + + // find a valid slot in the new chunk + rc = vec0Update_InsertNextAvailableStep(p, partitionKeyValues, &new_chunk_id, &new_chunk_offset, &blobChunksValidity, &bufferChunksValidity); + if (rc != SQLITE_OK) { + goto cleanup; + } + + // write vector datas to the valid slot + rc = vec0Update_InsertWriteFinalStep(p, new_chunk_id, new_chunk_offset, rowid, vectorDatas, blobChunksValidity, bufferChunksValidity); + if (rc != SQLITE_OK) { + goto cleanup; + } + sqlite3_free((void *)bufferChunksValidity); + if (sqlite3_blob_close(blobChunksValidity) != SQLITE_OK) { + rc = SQLITE_ERROR; + vtab_set_error(&p->base, + VEC_INTERAL_ERROR "unknown error, blobChunksValidity could " + "not be closed, please file an issue"); + goto cleanup; + } + + // copy metadata from previous chunk to new chunk + for (int i = 0; i < p->numMetadataColumns; i++) { + rc = vec0Update_SpecialInsert_OptimizeCopyMetadata(p, i, chunk_id, chunk_offset, new_chunk_id, new_chunk_offset); + if (rc != SQLITE_OK) { + goto cleanup; + } + } + + if (p->numPartitionColumns > 0 && sqlite3_step(partition_key_stmt) != SQLITE_DONE) { + rc = SQLITE_ERROR; + goto cleanup; + } + } + if (rc != SQLITE_DONE) { + goto cleanup; + } + sqlite3_finalize(partition_key_stmt); + sqlite3_finalize(stmt); + partition_key_stmt = NULL; + stmt = NULL; + + // 3) clean up old chunks + zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_CHUNKS_NAME " WHERE chunk_id <= ?", + p->schemaName, p->tableName); + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if (rc != SQLITE_OK) { + goto cleanup; + } + sqlite3_bind_int64(stmt, 1, prev_max_chunk_rowid); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + rc = SQLITE_ERROR; + goto cleanup; + } + sqlite3_finalize(stmt); + + // 4) clean up old vector chunks + for (int i = 0; i < p->numVectorColumns; i++) { + zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_VECTOR_N_NAME " WHERE rowid <= ?", + p->schemaName, p->tableName, i); + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if (rc != SQLITE_OK) { + goto cleanup; + } + sqlite3_bind_int64(stmt, 1, prev_max_chunk_rowid); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + rc = SQLITE_ERROR; + goto cleanup; + } + sqlite3_finalize(stmt); + } + + // 5) clean up old metadata chunks + for (int i = 0; i < p->numMetadataColumns; i++) { + zSql = sqlite3_mprintf("DELETE FROM " VEC0_SHADOW_METADATA_N_NAME " WHERE rowid <= ?", + p->schemaName, p->tableName, i); + rc = sqlite3_prepare_v2(p->db, zSql, -1, &stmt, 0); + sqlite3_free((void *)zSql); + if (rc != SQLITE_OK) { + goto cleanup; + } + sqlite3_bind_int64(stmt, 1, prev_max_chunk_rowid); + if ((rc != SQLITE_OK) || (sqlite3_step(stmt) != SQLITE_DONE)) { + rc = SQLITE_ERROR; + goto cleanup; + } + sqlite3_finalize(stmt); + } + + stmt = NULL; + rc = SQLITE_OK; + +cleanup: + sqlite3_finalize(partition_key_stmt); + sqlite3_finalize(stmt); +done: + return rc; } int vec0Update_SpecialInsert(sqlite3_vtab *pVTab, sqlite3_value *pVal) { From 5238684666d479769d9c3acce1495ecc80a5859b Mon Sep 17 00:00:00 2001 From: Wilbert Harriman Date: Wed, 12 Mar 2025 09:36:29 +0800 Subject: [PATCH 5/7] vector_chunks rowid and metadatachunks rowid should be integer --- sqlite-vec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlite-vec.c b/sqlite-vec.c index bb7936af..8bfdecc3 100644 --- a/sqlite-vec.c +++ b/sqlite-vec.c @@ -3417,7 +3417,7 @@ static sqlite3_module vec_npy_eachModule = { /// 1) schema, 2) original vtab table name #define VEC0_SHADOW_VECTOR_N_CREATE \ "CREATE TABLE " VEC0_SHADOW_VECTOR_N_NAME "(" \ - "rowid PRIMARY KEY," \ + "rowid INTEGER PRIMARY KEY," \ "vectors BLOB NOT NULL" \ ");" @@ -5107,7 +5107,7 @@ static int vec0_init(sqlite3 *db, void *pAux, int argc, const char *const *argv, } for (int i = 0; i < pNew->numMetadataColumns; i++) { - char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME "(rowid PRIMARY KEY, data BLOB NOT NULL);", + char *zSql = sqlite3_mprintf("CREATE TABLE " VEC0_SHADOW_METADATA_N_NAME "(rowid INTEGER PRIMARY KEY, data BLOB NOT NULL);", pNew->schemaName, pNew->tableName, i); if (!zSql) { goto error; From db360456f41ab439b5e73159533979d4bab812a8 Mon Sep 17 00:00:00 2001 From: Wilbert Harriman Date: Wed, 12 Mar 2025 10:38:38 +0800 Subject: [PATCH 6/7] update snapshot --- tests/__snapshots__/test-auxiliary.ambr | 11 ++-------- tests/__snapshots__/test-general.ambr | 26 ++++++----------------- tests/__snapshots__/test-metadata.ambr | 28 ++++++++++++------------- 3 files changed, 22 insertions(+), 43 deletions(-) diff --git a/tests/__snapshots__/test-auxiliary.ambr b/tests/__snapshots__/test-auxiliary.ambr index bfe3d2c9..43f82397 100644 --- a/tests/__snapshots__/test-auxiliary.ambr +++ b/tests/__snapshots__/test-auxiliary.ambr @@ -333,13 +333,6 @@ 'rootpage': 3, 'sql': None, }), - OrderedDict({ - 'type': 'index', - 'name': 'sqlite_autoindex_v_vector_chunks00_1', - 'tbl_name': 'v_vector_chunks00', - 'rootpage': 8, - 'sql': None, - }), OrderedDict({ 'type': 'table', 'name': 'sqlite_sequence', @@ -358,7 +351,7 @@ 'type': 'table', 'name': 'v_auxiliary', 'tbl_name': 'v_auxiliary', - 'rootpage': 9, + 'rootpage': 8, 'sql': 'CREATE TABLE "v_auxiliary"( rowid integer PRIMARY KEY , value00)', }), OrderedDict({ @@ -387,7 +380,7 @@ 'name': 'v_vector_chunks00', 'tbl_name': 'v_vector_chunks00', 'rootpage': 7, - 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid INTEGER PRIMARY KEY,vectors BLOB NOT NULL)', }), ]), }) diff --git a/tests/__snapshots__/test-general.ambr b/tests/__snapshots__/test-general.ambr index 0eac460f..177b58ba 100644 --- a/tests/__snapshots__/test-general.ambr +++ b/tests/__snapshots__/test-general.ambr @@ -33,25 +33,11 @@ 'rootpage': 3, 'sql': None, }), - OrderedDict({ - 'type': 'index', - 'name': 'sqlite_autoindex_v_metadatachunks00_1', - 'tbl_name': 'v_metadatachunks00', - 'rootpage': 10, - 'sql': None, - }), OrderedDict({ 'type': 'index', 'name': 'sqlite_autoindex_v_metadatatext00_1', 'tbl_name': 'v_metadatatext00', - 'rootpage': 12, - 'sql': None, - }), - OrderedDict({ - 'type': 'index', - 'name': 'sqlite_autoindex_v_vector_chunks00_1', - 'tbl_name': 'v_vector_chunks00', - 'rootpage': 8, + 'rootpage': 10, 'sql': None, }), OrderedDict({ @@ -72,7 +58,7 @@ 'type': 'table', 'name': 'v_auxiliary', 'tbl_name': 'v_auxiliary', - 'rootpage': 13, + 'rootpage': 11, 'sql': 'CREATE TABLE "v_auxiliary"( rowid integer PRIMARY KEY , value00)', }), OrderedDict({ @@ -93,14 +79,14 @@ 'type': 'table', 'name': 'v_metadatachunks00', 'tbl_name': 'v_metadatachunks00', - 'rootpage': 9, - 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', + 'rootpage': 8, + 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid INTEGER PRIMARY KEY, data BLOB NOT NULL)', }), OrderedDict({ 'type': 'table', 'name': 'v_metadatatext00', 'tbl_name': 'v_metadatatext00', - 'rootpage': 11, + 'rootpage': 9, 'sql': 'CREATE TABLE "v_metadatatext00"(rowid PRIMARY KEY, data TEXT)', }), OrderedDict({ @@ -115,7 +101,7 @@ 'name': 'v_vector_chunks00', 'tbl_name': 'v_vector_chunks00', 'rootpage': 7, - 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid INTEGER PRIMARY KEY,vectors BLOB NOT NULL)', }), ]), }) diff --git a/tests/__snapshots__/test-metadata.ambr b/tests/__snapshots__/test-metadata.ambr index 12212ff0..ecca4912 100644 --- a/tests/__snapshots__/test-metadata.ambr +++ b/tests/__snapshots__/test-metadata.ambr @@ -615,14 +615,14 @@ 'type': 'table', 'name': 'v_metadatachunks00', 'tbl_name': 'v_metadatachunks00', - 'rootpage': 9, - 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', + 'rootpage': 8, + 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid INTEGER PRIMARY KEY, data BLOB NOT NULL)', }), OrderedDict({ 'type': 'table', 'name': 'v_metadatatext00', 'tbl_name': 'v_metadatatext00', - 'rootpage': 11, + 'rootpage': 9, 'sql': 'CREATE TABLE "v_metadatatext00"(rowid PRIMARY KEY, data TEXT)', }), OrderedDict({ @@ -637,7 +637,7 @@ 'name': 'v_vector_chunks00', 'tbl_name': 'v_vector_chunks00', 'rootpage': 7, - 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid INTEGER PRIMARY KEY,vectors BLOB NOT NULL)', }), ]), }) @@ -1877,35 +1877,35 @@ 'type': 'table', 'name': 'v_metadatachunks00', 'tbl_name': 'v_metadatachunks00', - 'rootpage': 9, - 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid PRIMARY KEY, data BLOB NOT NULL)', + 'rootpage': 8, + 'sql': 'CREATE TABLE "v_metadatachunks00"(rowid INTEGER PRIMARY KEY, data BLOB NOT NULL)', }), OrderedDict({ 'type': 'table', 'name': 'v_metadatachunks01', 'tbl_name': 'v_metadatachunks01', - 'rootpage': 11, - 'sql': 'CREATE TABLE "v_metadatachunks01"(rowid PRIMARY KEY, data BLOB NOT NULL)', + 'rootpage': 9, + 'sql': 'CREATE TABLE "v_metadatachunks01"(rowid INTEGER PRIMARY KEY, data BLOB NOT NULL)', }), OrderedDict({ 'type': 'table', 'name': 'v_metadatachunks02', 'tbl_name': 'v_metadatachunks02', - 'rootpage': 13, - 'sql': 'CREATE TABLE "v_metadatachunks02"(rowid PRIMARY KEY, data BLOB NOT NULL)', + 'rootpage': 10, + 'sql': 'CREATE TABLE "v_metadatachunks02"(rowid INTEGER PRIMARY KEY, data BLOB NOT NULL)', }), OrderedDict({ 'type': 'table', 'name': 'v_metadatachunks03', 'tbl_name': 'v_metadatachunks03', - 'rootpage': 15, - 'sql': 'CREATE TABLE "v_metadatachunks03"(rowid PRIMARY KEY, data BLOB NOT NULL)', + 'rootpage': 11, + 'sql': 'CREATE TABLE "v_metadatachunks03"(rowid INTEGER PRIMARY KEY, data BLOB NOT NULL)', }), OrderedDict({ 'type': 'table', 'name': 'v_metadatatext03', 'tbl_name': 'v_metadatatext03', - 'rootpage': 17, + 'rootpage': 12, 'sql': 'CREATE TABLE "v_metadatatext03"(rowid PRIMARY KEY, data TEXT)', }), OrderedDict({ @@ -1920,7 +1920,7 @@ 'name': 'v_vector_chunks00', 'tbl_name': 'v_vector_chunks00', 'rootpage': 7, - 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid PRIMARY KEY,vectors BLOB NOT NULL)', + 'sql': 'CREATE TABLE "v_vector_chunks00"(rowid INTEGER PRIMARY KEY,vectors BLOB NOT NULL)', }), ]), }) From 8cf82467aeddb607bae32723d3aca4bf09cf7b51 Mon Sep 17 00:00:00 2001 From: Wilbert Harriman Date: Thu, 20 Feb 2025 15:44:13 +0800 Subject: [PATCH 7/7] test vacuum --- tests/test-auxiliary.py | 20 ++++++++++++++++++++ tests/test-metadata.py | 20 ++++++++++++++++++++ tests/test-partition-keys.py | 26 ++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/tests/test-auxiliary.py b/tests/test-auxiliary.py index d1f5f568..c1f8fecb 100644 --- a/tests/test-auxiliary.py +++ b/tests/test-auxiliary.py @@ -126,6 +126,26 @@ def test_knn(db, snapshot): ) == snapshot(name="illegal KNN w/ aux") +def test_vacuum(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], +name text)" + ) + db.executemany( + "insert into v(vector, name) values (?, ?)", + [("[1]", "alex"), ("[2]", "brian"), ("[3]", "craig")], + ) + + exec(db, "delete from v where 1 = 1") + prev_page_count = exec(db, "pragma page_count")["rows"][0]["page_count"] + + db.execute("insert into v(v) values ('optimize')") + db.commit() + db.execute("vacuum") + + cur_page_count = exec(db, "pragma page_count")["rows"][0]["page_count"] + assert cur_page_count < prev_page_count + + def exec(db, sql, parameters=[]): try: rows = db.execute(sql, parameters).fetchall() diff --git a/tests/test-metadata.py b/tests/test-metadata.py index 3c2e5423..fb618a62 100644 --- a/tests/test-metadata.py +++ b/tests/test-metadata.py @@ -286,6 +286,26 @@ def test_knn(db, snapshot): ) +def test_vacuum(db, snapshot): + db.execute( + "create virtual table v using vec0(vector float[1], name text)" + ) + db.executemany( + "insert into v(vector, name) values (?, ?)", + [("[1]", "alex"), ("[2]", "brian"), ("[3]", "craig")], + ) + + exec(db, "delete from v where 1 = 1") + prev_page_count = exec(db, "pragma page_count")["rows"][0]["page_count"] + + db.execute("insert into v(v) values ('optimize')") + db.commit() + db.execute("vacuum") + + cur_page_count = exec(db, "pragma page_count")["rows"][0]["page_count"] + assert cur_page_count < prev_page_count + + SUPPORTS_VTAB_IN = sqlite3.sqlite_version_info[1] >= 38 diff --git a/tests/test-partition-keys.py b/tests/test-partition-keys.py index fee35600..6e9042a6 100644 --- a/tests/test-partition-keys.py +++ b/tests/test-partition-keys.py @@ -74,6 +74,32 @@ def test_updates(db, snapshot): ) +def test_vacuum(db, snapshot): + db.execute( + "create virtual table v using vec0(p text partition key, a float[1])" + ) + + db.execute( + "insert into v(rowid, p, a) values (?, ?, ?)", [1, "a", b"\x11\x11\x11\x11"] + ) + db.execute( + "insert into v(rowid, p, a) values (?, ?, ?)", [2, "a", b"\x22\x22\x22\x22"] + ) + db.execute( + "insert into v(rowid, p, a) values (?, ?, ?)", [3, "a", b"\x33\x33\x33\x33"] + ) + + exec(db, "delete from v where 1 = 1") + prev_page_count = exec(db, "pragma page_count")["rows"][0]["page_count"] + + db.execute("insert into v(v) values ('optimize')") + db.commit() + db.execute("vacuum") + + cur_page_count = exec(db, "pragma page_count")["rows"][0]["page_count"] + assert cur_page_count < prev_page_count + + class Row: def __init__(self): pass