diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 049d4cd..d93fd75 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -3,6 +3,7 @@ on: push: branches: - main + - batching permissions: contents: read jobs: @@ -10,8 +11,6 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v4 - with: - submodules: recursive - uses: actions/setup-python@v5 with: python-version: "3.12" @@ -25,8 +24,6 @@ jobs: runs-on: macos-12 steps: - uses: actions/checkout@v4 - with: - submodules: recursive - run: make loadable - run: /usr/local/opt/python@3/libexec/bin/python -m pip install --break-system-packages pytest numpy; make test-loadable python=/usr/local/opt/python@3/libexec/bin/python - uses: actions/upload-artifact@v4 @@ -37,8 +34,6 @@ jobs: runs-on: macos-14 steps: - uses: actions/checkout@v4 - with: - submodules: recursive - run: make loadable - run: /opt/homebrew/opt/python3/libexec/bin/python -m pip install pytest numpy --break-system-packages; make test-loadable python=/opt/homebrew/opt/python3/libexec/bin/python - uses: actions/upload-artifact@v4 @@ -49,8 +44,6 @@ jobs: runs-on: windows-2019 steps: - uses: actions/checkout@v4 - with: - submodules: recursive - uses: ilammy/msvc-dev-cmd@v1 - uses: actions/setup-python@v5 with: @@ -63,3 +56,26 @@ jobs: with: name: sqlite-lembed-windows-x86_64-extension path: dist/*.dll + build-cosmopolitan: + if: false + runs-on: macos-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + - run: | + mkdir $HOME/cosmo + curl -L -o cosmocc-$COSMO_VERSION.zip https://github.com/jart/cosmopolitan/releases/download/$COSMO_VERSION/cosmocc-$COSMO_VERSION.zip + unzip cosmocc-$COSMO_VERSION.zip -d $HOME/cosmo + env: + COSMO_VERSION: "3.9.6" + - run: make sqlite-lembed.h + - run: make cli CC=$HOME/cosmo/bin/cosmocc CXX=$HOME/cosmo/bin/cosmoc++ AR=$HOME/cosmo/bin/cosmoar OMIT_SIMD=1 + - run: tar -czvf sqlite-lembed-$(cat VERSION)-cli-cosmopolitan.tar.gz dist/sqlite3 + - run: gh release upload ${{ github.ref_name }} sqlite-lembed-$(cat VERSION)-cli-cosmopolitan.tar.gz + env: + GH_TOKEN: ${{ github.token }} + - uses: actions/upload-artifact@v4 + with: + name: sqlite-lembed-cosmopolitan + path: dist/* diff --git a/.gitignore b/.gitignore index cc72133..e1e2d23 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ ggml-metal.metal News*.json sqlite-lembed.h dist/ + +*.db diff --git a/.gitmodules b/.gitmodules index 7edf097..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "vendor/llama.cpp"] - path = vendor/llama.cpp - url = https://github.com/ggerganov/llama.cpp.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fdef01..97e36bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,42 +4,77 @@ project(SqliteEmbed C CXX) set(CMAKE_C_STANDARD 99) set(CMAKE_C_STANDARD_REQUIRED ON) -set(LLAMA_METAL OFF) -set(LLAMA_STATIC ON) -set(LLAMA_OPENMP OFF) - -set(LLAMA_CPP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp") -set(SQLITE_AMALGAMATION_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vendor/sqlite") - +include(FetchContent) include(ExternalProject) -set(SQLITE_VERSION 3450300) + +# sqlite amalgamation, for up-to-date headers and sqlite3 CLI +set(SQLITE_VERSION 3470000) set(SQLITE_YEAR 2024) -set(SQLITE_URL https://www.sqlite.org/${SQLITE_YEAR}/sqlite-amalgamation-${SQLITE_VERSION}.zip) -ExternalProject_Add(sqlite_amalgamation - URL ${SQLITE_URL} - DOWNLOAD_DIR ${CMAKE_BINARY_DIR}/downloads - SOURCE_DIR ${SQLITE_AMALGAMATION_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" -) +set(SQLITE_URL) +FetchContent_Declare( + sqlite_amalgamation + URL https://www.sqlite.org/${SQLITE_YEAR}/sqlite-amalgamation-${SQLITE_VERSION}.zip + ) +FetchContent_MakeAvailable(sqlite_amalgamation) -add_subdirectory(${LLAMA_CPP_DIR} ${CMAKE_BINARY_DIR}/llama.cpp) +# llama.cpp +set(LLAMA_METAL OFF) +set(LLAMA_STATIC ON) +set(LLAMA_OPENMP OFF) -include_directories(${SQLITE_AMALGAMATION_DIR}) +FetchContent_Declare( + llama_cpp + GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git + GIT_TAG b3091 +) +FetchContent_MakeAvailable(llama_cpp) +# sqlite-lembed loadable add_library(sqlite_lembed SHARED sqlite-lembed.c) -add_dependencies(sqlite_lembed sqlite_amalgamation) target_link_libraries(sqlite_lembed ggml_static llama) target_include_directories(sqlite_lembed PRIVATE ${LLAMA_CPP_DIR}) +target_include_directories(sqlite_lembed PRIVATE ${sqlite_amalgamation_SOURCE_DIR}) set_target_properties(sqlite_lembed PROPERTIES PREFIX "") set_target_properties(sqlite_lembed PROPERTIES OUTPUT_NAME "lembed0") +# sqlite-lembed static add_library(sqlite_lembed_static STATIC sqlite-lembed.c) -add_dependencies(sqlite_lembed_static sqlite_amalgamation) target_link_libraries(sqlite_lembed_static ggml_static llama) target_include_directories(sqlite_lembed_static PRIVATE ${LLAMA_CPP_DIR}) +target_include_directories(sqlite_lembed_static PRIVATE ${sqlite_amalgamation_SOURCE_DIR}) target_compile_definitions(sqlite_lembed_static PRIVATE SQLITE_CORE) set_target_properties(sqlite_lembed_static PROPERTIES OUTPUT_NAME "sqlite_lembed0") + + +# sqlite-vec, for a better sqlite3 CLI +set(SQLITE_VEC_VERSION 0.1.6) +FetchContent_Declare( + sqlite_vec + URL https://github.com/asg017/sqlite-vec/releases/download/v${SQLITE_VEC_VERSION}/sqlite-vec-${SQLITE_VEC_VERSION}-amalgamation.tar.gz +) +FetchContent_MakeAvailable(sqlite_vec) + + +# sqlite3 CLI, with sqlite-lembed and sqlite-vec +add_executable( + sqlite3_cli + ${sqlite_amalgamation_SOURCE_DIR}/shell.c + ${sqlite_amalgamation_SOURCE_DIR}/sqlite3.c + ${sqlite_vec_SOURCE_DIR}/sqlite-vec.c + core_init.c +) +add_dependencies(sqlite3_cli sqlite_lembed_static) +target_link_libraries(sqlite3_cli sqlite_lembed_static) +target_include_directories( + sqlite3_cli PRIVATE + ${sqlite_amalgamation_SOURCE_DIR} + ${sqlite_vec_SOURCE_DIR} +) +target_compile_definitions( + sqlite3_cli PUBLIC + SQLITE_EXTRA_INIT=core_init + SQLITE_CORE +) +set_target_properties(sqlite3_cli PROPERTIES OUTPUT_NAME "sqlite3") diff --git a/Makefile b/Makefile index b11db35..cc09b58 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ COMMIT=$(shell git rev-parse HEAD) VERSION=$(shell cat VERSION) DATE=$(shell date +'%FT%TZ%z') -LLAMA_CMAKE_FLAGS=-DLLAMA_OPENMP=OFF +LLAMA_CMAKE_FLAGS+=-DLLAMA_OPENMP=OFF ifndef CC CC=gcc endif @@ -55,10 +55,11 @@ $(prefix): TARGET_LOADABLE=$(prefix)/lembed0.$(LOADABLE_EXTENSION) TARGET_STATIC=$(prefix)/libsqlite_lembed0.a TARGET_STATIC_H=$(prefix)/sqlite-lembed.h +TARGET_CLI=$(prefix)/sqlite3 loadable: $(TARGET_LOADABLE) static: $(TARGET_STATIC) - +cli: $(TARGET_CLI) BUILD_DIR=$(prefix)/.build @@ -85,6 +86,16 @@ $(TARGET_LOADABLE): sqlite-lembed.c sqlite-lembed.h $(BUILD_DIR) $(prefix) ls $(BUILD_DIR) cp $(BUILT_LOADABLE_PATH) $@ +$(TARGET_STATIC): sqlite-lembed.c sqlite-lembed.h $(BUILD_DIR) $(prefix) + cmake --build $(BUILD_DIR) -t sqlite_lembed_static $(EXTRA_CMAKE_BUILD) + ls $(BUILD_DIR) + cp $(BUILT_LOADABLE_PATH) $@ + +$(TARGET_CLI): sqlite-lembed.c sqlite-lembed.h $(BUILD_DIR) $(prefix) + cmake --build $(BUILD_DIR) -t sqlite3_cli $(EXTRA_CMAKE_BUILD) + ls $(BUILD_DIR) + cp $(BUILD_DIR)/sqlite3 $@ + sqlite-lembed.h: sqlite-lembed.h.tmpl VERSION VERSION=$(shell cat VERSION) \ @@ -100,8 +111,18 @@ $(MODELS_DIR): $(BUILD_DIR) $(MODELS_DIR)/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf: $(MODELS_DIR) curl -L -o $@ https://huggingface.co/asg017/sqlite-lembed-model-examples/resolve/main/all-MiniLM-L6-v2/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf -test-loadable: $(TARGET_LOADABLE) $(MODELS_DIR)/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf - $(PYTHON) -m pytest tests/test-loadable.py +$(MODELS_DIR)/mxbai-embed-xsmall-v1-q8_0.gguf: $(MODELS_DIR) + curl -L -o $@ https://huggingface.co/mixedbread-ai/mxbai-embed-xsmall-v1/resolve/main/gguf/mxbai-embed-xsmall-v1-q8_0.gguf + +$(MODELS_DIR)/nomic-embed-text-v1.5.Q2_K.gguf: $(MODELS_DIR) + curl -L -o $@ https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.Q2_K.gguf + +models: $(MODELS_DIR)/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf $(MODELS_DIR)/mxbai-embed-xsmall-v1-q8_0.gguf $(MODELS_DIR)/nomic-embed-text-v1.5.Q2_K.gguf + +test-loadable: $(TARGET_LOADABLE) models + $(PYTHON) -m pytest tests/test-loadable.py -s -x -vv +test-loadable-watch: + watchexec -w sqlite-lembed.c -w tests/test-loadable.py -w Makefile --clear -- make test-loadable FORMAT_FILES=sqlite-lembed.c diff --git a/README.md b/README.md index 2d856ef..7fc141f 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ To load it into `sqlite-lembed`, register it with the `temp.lembed_models` table ```sql .load ./lembed0 -INSERT INTO temp.lembed_models(name, model) - select 'all-MiniLM-L6-v2', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf'); +insert into temp.lembed_models(name, model) + values ('default', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf')); select lembed( 'all-MiniLM-L6-v2', @@ -54,7 +54,7 @@ create virtual table vec_articles using vec0( ); insert into vec_articles(rowid, headline_embeddings) - select rowid, lembed('all-MiniLM-L6-v2', headline) + select rowid, lembed( headline) from articles; ``` @@ -71,7 +71,7 @@ with matches as ( rowid, distance from vec_articles - where headline_embeddings match lembed('all-MiniLM-L6-v2', :query) + where headline_embeddings match lembed(:query) order by distance limit 3 ) diff --git a/core_init.c b/core_init.c new file mode 100644 index 0000000..f8ee09f --- /dev/null +++ b/core_init.c @@ -0,0 +1,12 @@ +#include "sqlite3.h" +#include "sqlite-vec.h" +#include "sqlite-lembed.h" +#include +int core_init(const char *dummy) { + int rc; + rc = sqlite3_auto_extension((void *)sqlite3_vec_init); + if(rc == SQLITE_OK) { + rc = sqlite3_auto_extension((void *)sqlite3_lembed_init); + } + return rc; +} diff --git a/sqlite-lembed.c b/sqlite-lembed.c index 479a554..59ac0cc 100644 --- a/sqlite-lembed.c +++ b/sqlite-lembed.c @@ -13,10 +13,12 @@ SQLITE_EXTENSION_INIT1 #define UNUSED_PARAMETER(X) (void)(X) #endif +#define SQLITE_VEC_FLOAT32_SUBTYPE 223 + void dummy_log(enum ggml_log_level level, const char *text, void *user_data) {} static void normalize(float *vec, float *out, int n) { - float norm = 0; + double norm = 0; for (int i = 0; i < n; i++) { norm += vec[i] * vec[i]; } @@ -52,25 +54,40 @@ int tokenize(struct llama_model *model, const char *input, size_t input_length, return SQLITE_OK; } -int embed_single(struct llama_model *model, struct llama_context *context, + +int embed_single(struct llama_context *context, const char *input, size_t input_length, /** Output float embedding */ float **out_embedding, /** Output embedding length (n dimensions) */ - int *out_dimensions) { - int n_batch = 512; + int *out_dimensions, + char ** errmsg) { + struct llama_model * model = (struct llama_model *) llama_get_model(context); + int n_ctx_train = llama_n_ctx_train(model); int n_ctx = llama_n_ctx(context); + int dimensions = llama_n_embd(model); + float *output_embedding = sqlite3_malloc(sizeof(float) * dimensions); + if(!output_embedding) { + return SQLITE_NOMEM; + } + llama_token *tokens; int token_count; int rc = tokenize(model, input, input_length, &token_count, &tokens); if(rc != SQLITE_OK) { // TODO error message + *errmsg = sqlite3_mprintf("Could not tokenize input."); return rc; } - struct llama_batch batch = llama_batch_init(n_batch, 0, 1); + if(token_count > n_ctx) { + *errmsg = sqlite3_mprintf("Input too long, provided %lld tokens, but model has context size of %lld", (int64_t) token_count, (int64_t) n_ctx); + return SQLITE_ERROR; + } + + struct llama_batch batch = llama_batch_init(n_ctx, 0, 1); int seq_id = 0; // llama_batch_add(batch, tokens, 0, ) @@ -85,18 +102,12 @@ int embed_single(struct llama_model *model, struct llama_context *context, batch.n_tokens++; } - int dimensions = llama_n_embd(model); - float *output_embedding = sqlite3_malloc(sizeof(float) * dimensions); - if(!output_embedding) { - llama_batch_free(batch); - return SQLITE_NOMEM; - } - llama_kv_cache_clear(context); // KV not needed for embeddings? rc = llama_decode(context, batch); if(rc != 0) { sqlite3_free(output_embedding); llama_batch_free(batch); + *errmsg = sqlite3_mprintf("Could not decode batch"); return SQLITE_ERROR; } @@ -110,6 +121,7 @@ int embed_single(struct llama_model *model, struct llama_context *context, if(!source_embedding) { sqlite3_free(output_embedding); llama_batch_free(batch); + *errmsg = sqlite3_mprintf("Could not find embedding"); return SQLITE_ERROR; } @@ -149,33 +161,40 @@ struct lembed_model_options { static char *POINTER_NAME_MODEL = "lembed_model"; static char *POINTER_NAME_MODEL_OPTIONS = "lembed_model_options"; -static void lembed_model_size(sqlite3_context *context, int argc, - sqlite3_value **argv) { - struct llama_model *model = - sqlite3_value_pointer(argv[0], POINTER_NAME_MODEL); - if (!model) - return; - sqlite3_result_int64(context, llama_model_size(model)); -} - static void lembed_model_options_(sqlite3_context *context, int argc, sqlite3_value **argv) { - assert(argc >= 0); - assert(argc % 2 == 0); + + if(argc % 2 == 0) { + sqlite3_result_error(context, "an even number of arguments are required in lembed_model_options, key-value pairs", -1); + return; + } lembed_model_options *o = sqlite3_malloc(sizeof(lembed_model_options)); - assert(o); + if(!o) { + sqlite3_result_error_nomem(context); + return; + } memset(o, 0, sizeof(*o)); for (int i = 0; i < argc; i += 2) { sqlite3_value *key = argv[i]; sqlite3_value *value = argv[i + 1]; - assert(sqlite3_value_type(key) == SQLITE_TEXT); + if(sqlite3_value_type(key) != SQLITE_TEXT) { + char * errmsg = sqlite3_mprintf("Expected string key at index %d", i); + sqlite3_result_error(context, errmsg, -1); + sqlite3_free(errmsg); + sqlite3_free(o); + return; + } const char *k = (const char *)sqlite3_value_text(key); if (sqlite3_stricmp(k, "n_gpu_layers") == 0) { o->n_gpu_layers = sqlite3_value_int(value); o->defined[0] = 1; } else { - abort(); + char * errmsg = sqlite3_mprintf("Unknown model option '%s'", k); + sqlite3_result_error(context, errmsg, -1); + sqlite3_free(errmsg); + sqlite3_free(o); + return; } } sqlite3_result_pointer(context, o, POINTER_NAME_MODEL_OPTIONS, sqlite3_free); @@ -194,25 +213,43 @@ static char *POINTER_NAME_CONTEXT_OPTIONS = "lembed_context_options"; static void lembed_context_options_(sqlite3_context *context, int argc, sqlite3_value **argv) { - assert(argc >= 0); - assert(argc % 2 == 0); + if(argc % 2 == 0) { + sqlite3_result_error(context, "an even number of arguments are required in lembed_context_options, key-value pairs", -1); + return; + } lembed_context_options *o = sqlite3_malloc(sizeof(lembed_context_options)); - assert(o); + if(!o) { + sqlite3_result_error_nomem(context); + return; + } memset(o, 0, sizeof(*o)); for (int i = 0; i < argc; i += 2) { sqlite3_value *key = argv[i]; sqlite3_value *value = argv[i + 1]; - assert(sqlite3_value_type(key) == SQLITE_TEXT); + if(sqlite3_value_type(key) != SQLITE_TEXT) { + char * errmsg = sqlite3_mprintf("Expected string value at index %d", i+1); + sqlite3_result_error(context, errmsg, -1); + sqlite3_free(errmsg); + return; + } const char *k = (const char *)sqlite3_value_text(key); if (sqlite3_stricmp("seed", k) == 0) { sqlite3_int64 v = sqlite3_value_int64(value); - assert(v > 0); + if(v < 0) { + sqlite3_result_error(context, "Expected positive value for seed", -1); + sqlite3_free(o); + return; + } o->seed = v; o->defined[0] = 1; } else if (sqlite3_stricmp("n_ctx", k) == 0) { sqlite3_int64 v = sqlite3_value_int64(value); - assert(v > 0); + if(v < 0) { + sqlite3_result_error(context, "Expected positive value for n_ctx", -1); + sqlite3_free(o); + return; + } o->n_ctx = v; o->defined[1] = 1; } else if (sqlite3_stricmp("rope_scaling_type", k) == 0) { @@ -302,27 +339,54 @@ static void lembed(sqlite3_context *context, int argc, sqlite3_value **argv) { int dimensions; float *embedding; - rc = embed_single(model, ctx, input, input_len, &embedding, &dimensions); + char * errmsg; + rc = embed_single(ctx, input, input_len, &embedding, &dimensions, &errmsg); if(rc != SQLITE_OK) { - sqlite3_result_error(context, "Error generating embedding", -1); + sqlite3_result_error(context, sqlite3_mprintf("Error generating embedding: %z", errmsg), -1); return; } sqlite3_result_blob(context, embedding, sizeof(float) * dimensions, sqlite3_free); - sqlite3_result_subtype(context, 223); // TODO define + sqlite3_result_subtype(context, SQLITE_VEC_FLOAT32_SUBTYPE); } static void lembed_tokenize_json(sqlite3_context *context, int argc, sqlite3_value **argv) { + int rc; struct llama_model *model; - int rc = api_model_from_name((struct Api *)sqlite3_user_data(context), + struct llama_context *ctx; + const char *input; + sqlite3_int64 input_len; + + if(argc == 1) { + input = (const char *)sqlite3_value_text(argv[0]); + input_len = sqlite3_value_bytes(argv[0]); + rc = api_model_from_name((struct Api *)sqlite3_user_data(context), "default", strlen("default"), &model, &ctx); + if(rc != SQLITE_OK) { + sqlite3_result_error(context, "No default model has been registered yet with lembed_models", -1); + return; + } + }else { + input = (const char *)sqlite3_value_text(argv[1]); + input_len = sqlite3_value_bytes(argv[1]); + rc = api_model_from_name((struct Api *)sqlite3_user_data(context), (const char *)sqlite3_value_text(argv[0]), - sqlite3_value_bytes(argv[0]), &model, NULL); - const char *input = (const char *)sqlite3_value_text(argv[1]); - sqlite3_int64 input_len = sqlite3_value_bytes(argv[1]); + sqlite3_value_bytes(argv[0]), &model, &ctx); + + if(rc != SQLITE_OK) { + char * zSql = sqlite3_mprintf("Unknown model name '%s'. Was it registered with lembed_models?", sqlite3_value_text(argv[0])); + sqlite3_result_error(context, zSql, -1); + sqlite3_free(zSql); + return; + } + } + int token_count; llama_token *tokens; rc = tokenize(model, input, input_len, &token_count, &tokens); - assert(rc == SQLITE_OK); + if(rc != SQLITE_OK) { + sqlite3_result_error(context, "Failed to tokenize input", -1); + return; + } sqlite3_str *s = sqlite3_str_new(NULL); sqlite3_str_appendchar(s, 1, '['); @@ -334,8 +398,11 @@ static void lembed_tokenize_json(sqlite3_context *context, int argc, } sqlite3_str_appendchar(s, 1, ']'); char *result = sqlite3_str_finish(s); - assert(result); - sqlite3_result_text(context, result, -1, sqlite3_free); + if(!result) { + sqlite3_result_error_nomem(context); + }else { + sqlite3_result_text(context, result, -1, sqlite3_free); + } } static void lembed_token_score(sqlite3_context *context, int argc, @@ -374,6 +441,15 @@ static void ggml_test(sqlite3_context *context, int argc, sqlite3_result_int64(context, ggml_cpu_has_avx()); } + +void lembed_vtab_set_error(sqlite3_vtab *pVTab, const char *zFormat, ...) { + va_list args; + sqlite3_free(pVTab->zErrMsg); + va_start(args, zFormat); + pVTab->zErrMsg = sqlite3_vmprintf(zFormat, args); + va_end(args); +} + #pragma region lembed_models() table function typedef struct lembed_models_vtab lembed_models_vtab; @@ -398,9 +474,13 @@ static int lembed_modelsConnect(sqlite3 *db, void *pAux, int argc, } #define LEMBED_MODELS_NAME 0 #define LEMBED_MODELS_MODEL 1 -#define LEMBED_MODELS_MODEL_OPTIONS 2 -#define LEMBED_MODELS_CONTEXT_OPTIONS 3 - rc = sqlite3_declare_vtab(db, "CREATE TABLE x(name, model, model_options " +#define LEMBED_MODELS_SIZE 2 +#define LEMBED_MODELS_DIMENSIONS 3 +#define LEMBED_MODELS_N_CTX 4 +#define LEMBED_MODELS_POOLING_TYPE 5 +#define LEMBED_MODELS_MODEL_OPTIONS 6 +#define LEMBED_MODELS_CONTEXT_OPTIONS 7 + rc = sqlite3_declare_vtab(db, "CREATE TABLE x(name, model, size, dimensions, n_ctx, pooling_type, model_options " "hidden, context_options hidden)"); if (rc == SQLITE_OK) { pNew = sqlite3_malloc(sizeof(*pNew)); @@ -431,8 +511,13 @@ static int lembed_modelsUpdate(sqlite3_vtab *pVTab, int argc, // INSERT operation else if (argc > 1 && sqlite3_value_type(argv[0]) == SQLITE_NULL) { sqlite3_value **columnValues = &argv[2]; - const char *key = - (const char *)sqlite3_value_text(columnValues[LEMBED_MODELS_NAME]); + const char *key; + if(sqlite3_value_type(columnValues[LEMBED_MODELS_NAME]) == SQLITE_NULL) { + key = "default"; + }else { + key = (const char *)sqlite3_value_text(columnValues[LEMBED_MODELS_NAME]); + } + int idx = -1; for (int i = 0; i < MAX_MODELS; i++) { if (!p->api->models[i].name) { @@ -444,9 +529,18 @@ static int lembed_modelsUpdate(sqlite3_vtab *pVTab, int argc, if (idx < 0) abort(); - const char *modelPath = sqlite3_value_pointer( - columnValues[LEMBED_MODELS_MODEL], POINTER_NAME_MODEL_PATH); - assert(modelPath); + + const char *modelPath; + if(sqlite3_value_subtype(columnValues[LEMBED_MODELS_MODEL]) == POINTER_SUBTYPE) { + modelPath = sqlite3_value_pointer(columnValues[LEMBED_MODELS_MODEL], POINTER_NAME_MODEL_PATH); + } + else if (sqlite3_value_type(columnValues[LEMBED_MODELS_MODEL]) == SQLITE_TEXT) { + modelPath = sqlite3_value_text(columnValues[LEMBED_MODELS_MODEL]); + } + if(!modelPath) { + lembed_vtab_set_error(pVTab, "Could not resolve model path"); + return SQLITE_ERROR; + } lembed_model_options *modelOptions = NULL; if (sqlite3_value_subtype(columnValues[LEMBED_MODELS_MODEL_OPTIONS]) == @@ -478,6 +572,7 @@ static int lembed_modelsUpdate(sqlite3_vtab *pVTab, int argc, struct llama_context *ctx; struct llama_context_params cparams = llama_context_default_params(); cparams.embeddings = 1; + //cparams.n_ubatch = cparams.n_batch = 4096; if (contextOptions) { if (contextOptions->defined[0]) { cparams.seed = contextOptions->seed; @@ -583,6 +678,37 @@ static int lembed_modelsColumn(sqlite3_vtab_cursor *cur, sqlite3_result_text(context, p->api->models[pCur->iRowid].name, -1, SQLITE_TRANSIENT); break; + case LEMBED_MODELS_SIZE: + sqlite3_result_int64(context, llama_model_size(p->api->models[pCur->iRowid].model)); + break; + case LEMBED_MODELS_DIMENSIONS: + sqlite3_result_int64(context, llama_n_embd(p->api->models[pCur->iRowid].model)); + break; + case LEMBED_MODELS_N_CTX: + sqlite3_result_int64(context, llama_n_ctx(p->api->models[pCur->iRowid].context)); + break; + case LEMBED_MODELS_POOLING_TYPE: { + switch(llama_pooling_type(p->api->models[pCur->iRowid].context)) { + case LLAMA_POOLING_TYPE_NONE: { + sqlite3_result_text(context, "none", -1, SQLITE_STATIC); + break; + } + case LLAMA_POOLING_TYPE_MEAN: { + sqlite3_result_text(context, "mean", -1, SQLITE_STATIC); + break; + } + case LLAMA_POOLING_TYPE_CLS: { + sqlite3_result_text(context, "cls", -1, SQLITE_STATIC); + break; + } + case LLAMA_POOLING_TYPE_UNSPECIFIED: { + sqlite3_result_text(context, "unspecified", -1, SQLITE_STATIC); + break; + } + } + break; + } + case LEMBED_MODELS_MODEL: sqlite3_result_pointer(context, p->api->models[pCur->iRowid].model, POINTER_NAME_MODEL, NULL); @@ -618,74 +744,164 @@ static sqlite3_module lembed_modelsModule = { /* xShadowName */ 0}; #pragma endregion -#pragma region lembed_chunks() table function +#pragma region lembed_batch + + +struct Array { + size_t element_size; + size_t length; + size_t capacity; + void *z; +}; + +/** + * @brief Initial an array with the given element size and capacity. + * + * @param array + * @param element_size + * @param init_capacity + * @return SQLITE_OK on success, error code on failure. Only error is + * SQLITE_NOMEM + */ +int lembed_array_init(struct Array *array, size_t element_size, size_t init_capacity) { + int sz = element_size * init_capacity; + void *z = sqlite3_malloc(sz); + if (!z) { + return SQLITE_NOMEM; + } + memset(z, 0, sz); + + array->element_size = element_size; + array->length = 0; + array->capacity = init_capacity; + array->z = z; + return SQLITE_OK; +} -typedef struct lembed_chunks_vtab lembed_chunks_vtab; -struct lembed_chunks_vtab { +int lembed_array_append(struct Array *array, const void *element) { + if (array->length == array->capacity) { + size_t new_capacity = array->capacity * 2 + 100; + void *z = sqlite3_realloc64(array->z, array->element_size * new_capacity); + if (z) { + array->capacity = new_capacity; + array->z = z; + } else { + return SQLITE_NOMEM; + } + } + memcpy(&((unsigned char *)array->z)[array->length * array->element_size], + element, array->element_size); + array->length++; + return SQLITE_OK; +} + +void lembed_array_cleanup(struct Array *array) { + if (!array) + return; + array->element_size = 0; + array->length = 0; + array->capacity = 0; + sqlite3_free(array->z); + array->z = NULL; +} + +typedef struct lembed_batch_vtab lembed_batch_vtab; +struct lembed_batch_vtab { sqlite3_vtab base; - struct Api *api; + sqlite3 * db; + struct Api * api; }; -typedef struct lembed_chunks_cursor lembed_chunks_cursor; -struct lembed_chunks_cursor { +typedef struct lembed_batch_cursor lembed_batch_cursor; +struct lembed_batch_cursor { sqlite3_vtab_cursor base; + struct Api * api; + struct llama_context *lctx; sqlite3_int64 iRowid; - int32_t chunks_count; - char **chunks; + sqlite3_stmt * stmt; + int dimensions; + int eof; + int stmtRc; + + + int batchIdx; + int batchSize; + struct Array contentsArray; + struct Array contentLengthsArray; + float * embeddings; }; -static int lembed_chunksConnect(sqlite3 *db, void *pAux, int argc, - const char *const *argv, sqlite3_vtab **ppVtab, - char **pzErr) { - lembed_chunks_vtab *pNew; + +static int lembed_batchConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + lembed_batch_vtab *pNew; int rc; -#define lembed_chunks_CONTENTS 0 -#define lembed_chunks_TOKEN_COUNT 1 -#define lembed_chunks_SOURCE 2 -#define lembed_chunks_CHUNK_SIZE 3 - rc = sqlite3_declare_vtab(db, "CREATE TABLE x(contents, token_count, source " - "hidden, chunk_size hidden)"); - if (rc == SQLITE_OK) { - pNew = sqlite3_malloc(sizeof(*pNew)); - *ppVtab = (sqlite3_vtab *)pNew; - if (pNew == 0) - return SQLITE_NOMEM; + + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(contents,embedding, model hidden, input hidden)" + ); +#define LEMBED_BATCH_CONTENTS 0 +#define LEMBED_BATCH_EMBEDDING 1 +#define LEMBED_BATCH_MODEL 2 +#define LEMBED_BATCH_INPUT 3 + if( rc==SQLITE_OK ){ + pNew = sqlite3_malloc( sizeof(*pNew) ); + *ppVtab = (sqlite3_vtab*)pNew; + if( pNew==0 ) return SQLITE_NOMEM; memset(pNew, 0, sizeof(*pNew)); - pNew->api = pAux; } + rc = sqlite3_open(":memory:", &pNew->db); + pNew->api = pAux; return rc; } -static int lembed_chunksDisconnect(sqlite3_vtab *pVtab) { - lembed_chunks_vtab *p = (lembed_chunks_vtab *)pVtab; +static int lembed_batchDisconnect(sqlite3_vtab *pVtab){ + lembed_batch_vtab *p = (lembed_batch_vtab*)pVtab; + sqlite3_close(p->db); sqlite3_free(p); return SQLITE_OK; } -static int lembed_chunksOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { - lembed_chunks_cursor *pCur; - pCur = sqlite3_malloc(sizeof(*pCur)); - if (pCur == 0) - return SQLITE_NOMEM; +static int lembed_batchOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + lembed_batch_cursor *pCur; + pCur = sqlite3_malloc( sizeof(*pCur) ); + if( pCur==0 ) return SQLITE_NOMEM; memset(pCur, 0, sizeof(*pCur)); *ppCursor = &pCur->base; - return SQLITE_OK; + pCur->api = ( (lembed_batch_vtab *) p)->api; + int rc = sqlite3_prepare_v2( + ( (lembed_batch_vtab *) p)->db, + "select json_extract(value, '$.contents') from json_each(?)", + -1, + &pCur->stmt, + NULL + ); + assert(rc == SQLITE_OK); + return rc; } -static int lembed_chunksClose(sqlite3_vtab_cursor *cur) { - lembed_chunks_cursor *pCur = (lembed_chunks_cursor *)cur; +static int lembed_batchClose(sqlite3_vtab_cursor *cur){ + lembed_batch_cursor *pCur = (lembed_batch_cursor*)cur; + sqlite3_finalize(pCur->stmt); sqlite3_free(pCur); return SQLITE_OK; } -static int lembed_chunksBestIndex(sqlite3_vtab *pVTab, - sqlite3_index_info *pIdxInfo) { +static int lembed_batchBestIndex( + sqlite3_vtab *pVTab, + sqlite3_index_info *pIdxInfo +){ int hasSource = 0; - int idxChunkSize = -1; + for (int i = 0; i < pIdxInfo->nConstraint; i++) { const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i]; switch (pCons->iColumn) { - case lembed_chunks_SOURCE: { + case LEMBED_BATCH_MODEL: { if (!hasSource && !pCons->usable || pCons->op != SQLITE_INDEX_CONSTRAINT_EQ) return SQLITE_CONSTRAINT; @@ -694,8 +910,6 @@ static int lembed_chunksBestIndex(sqlite3_vtab *pVTab, pIdxInfo->aConstraintUsage[i].omit = 1; break; } - case lembed_chunks_CHUNK_SIZE: { - } } } if (!hasSource) { @@ -703,134 +917,232 @@ static int lembed_chunksBestIndex(sqlite3_vtab *pVTab, return SQLITE_ERROR; } - pIdxInfo->idxNum = 1; pIdxInfo->estimatedCost = (double)10; pIdxInfo->estimatedRows = 10; return SQLITE_OK; } -static int lembed_chunksFilter(sqlite3_vtab_cursor *pVtabCursor, int idxNum, - const char *idxStr, int argc, - sqlite3_value **argv) { - lembed_chunks_cursor *pCur = (lembed_chunks_cursor *)pVtabCursor; - struct Api *api = ((lembed_chunks_vtab *)pVtabCursor->pVtab)->api; - struct llama_model *model; - int rc = api_model_from_name(api, (const char *)sqlite3_value_text(argv[0]), - sqlite3_value_bytes(argv[0]), &model, NULL); - pCur->iRowid = 0; +// SQLITE_ROW: embed some, stmt has more +// SQLITE_DONE: done after this chunk +// else: error +int embed_batch( + lembed_batch_cursor *pCur + ) { + uint32_t n_batch = llama_n_ctx(pCur->lctx); + struct llama_batch batch = llama_batch_init(n_batch, 0, 1); + int nprocessed = 0; + int rc; - char *input = (char *)sqlite3_value_text(argv[1]); - sqlite3_int64 input_len = sqlite3_value_bytes(argv[1]); - int32_t chunk_size = 5; // sqlite3_value_int(argv[1]); - int32_t overlap = 0; // argc > 2 ? sqlite3_value_int(argv[2]) : 0; + while(1) { + if(pCur->stmtRc == SQLITE_DONE) { + pCur->eof = 1; + break; + } + assert(pCur->stmtRc == SQLITE_ROW); - int token_count; - llama_token *tokens; - rc = tokenize(model, input, input_len, &token_count, &tokens); - assert(rc == SQLITE_OK); + char * s = (char *) sqlite3_column_text(pCur->stmt, 0); + int len = sqlite3_column_bytes(pCur->stmt, 0); - char *ptr = input; - int nchunks = ceil(1.0 * token_count / chunk_size); - pCur->chunks_count = nchunks; - pCur->chunks = sqlite3_malloc(sizeof(char *) * nchunks); - assert(pCur->chunks); - - for (int i = 0; i < nchunks; i++) { - sqlite3_str *str_chunk = sqlite3_str_new(NULL); - assert(str_chunk); - - for (int j = 0; j < chunk_size; j++) { - int32_t token = tokens[i * chunk_size + j]; - int32_t piece_len_neg = - llama_token_to_piece(model, token, NULL, 0, false); - // printf("%d\n", piece_len_neg); - // assert(piece_len_neg < 0); - int32_t piece_len = abs(piece_len_neg); - // include prefix space? - // assert(piece_len > 1); - if (!piece_len) - continue; + int input_token_count_estimate = llama_tokenize(llama_get_model(pCur->lctx), s, len, NULL, 0, true, true); + assert(input_token_count_estimate < 0); + llama_token *tokens = sqlite3_malloc(sizeof(llama_token) * abs(input_token_count_estimate)); + assert(tokens); - char *piece = sqlite3_malloc(piece_len); - assert(piece); - llama_token_to_piece(model, token, piece, piece_len, false); - // printf("'%.*s' %d ", piece_len, piece, tokens[i*chunk_size + j]); + int input_token_count = llama_tokenize(llama_get_model(pCur->lctx), s, len, tokens, abs(input_token_count_estimate), true, true); + assert(input_token_count == abs(input_token_count_estimate)); - char *begin = ptr; - while (*ptr != piece[piece_len > 1 ? 1 : 0]) { - ptr++; - } - sqlite3_str_append(str_chunk, begin, ptr - begin + piece_len); - ptr += piece_len; + if (batch.n_tokens + input_token_count > n_batch) { + assert(nprocessed>0); + sqlite3_free(tokens); + break; + } - sqlite3_free(piece); + for (size_t i = 0; i < input_token_count; i++) { + batch.token [batch.n_tokens] = tokens[i]; + batch.pos [batch.n_tokens] = i; + batch.n_seq_id[batch.n_tokens] = 1; + batch.seq_id[batch.n_tokens][0] = nprocessed; + batch.logits [batch.n_tokens] = i == (input_token_count - 1); + batch.n_tokens++; } + sqlite3_free(tokens); + nprocessed += 1; + char * zCopy = sqlite3_mprintf("%.*s", len, s); + assert(zCopy); + lembed_array_append(&pCur->contentsArray, &zCopy) == SQLITE_OK;//assert(); + lembed_array_append(&pCur->contentLengthsArray, &len) == SQLITE_OK;//assert(); + pCur->stmtRc = sqlite3_step(pCur->stmt); + } + if(nprocessed==0) { + pCur->batchSize = 0; + pCur->batchIdx = 0; + return SQLITE_DONE; + } + printf("nprocessed=%d\n", nprocessed); + + float * embeddings = sqlite3_malloc(pCur->dimensions * sizeof(float) * nprocessed); + assert(embeddings); + memset(embeddings, 0, pCur->dimensions * sizeof(float) * nprocessed); - char *chunk = sqlite3_str_finish(str_chunk); - assert(chunk); - pCur->chunks[i] = chunk; + llama_kv_cache_clear(pCur->lctx); + rc = llama_decode(pCur->lctx, batch); + assert(rc >= 0 ); + for (int i = 0; i < batch.n_tokens; i++) { + if (!batch.logits[i]) { + continue; + } + + float * embd = llama_get_embeddings_seq(pCur->lctx, batch.seq_id[i][0]); + assert(embd); + float * out = embeddings + batch.seq_id[i][0] * pCur->dimensions; + normalize(embd, out, pCur->dimensions); } - return SQLITE_OK; + llama_batch_free(batch); + pCur->embeddings = embeddings; + pCur->batchSize = nprocessed; + pCur->batchIdx = 0; + return SQLITE_ROW; } +static int lembed_batchFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + int rc; + lembed_batch_cursor *pCur = (lembed_batch_cursor *)pVtabCursor; + sqlite3_reset(pCur->stmt); + sqlite3_clear_bindings(pCur->stmt); + sqlite3_bind_text(pCur->stmt, 1, sqlite3_value_text(argv[0]), sqlite3_value_bytes(argv[0]), SQLITE_TRANSIENT); + pCur->stmtRc = sqlite3_step(pCur->stmt); + assert(pCur->stmtRc == SQLITE_ROW || pCur->stmtRc == SQLITE_DONE); -static int lembed_chunksRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { - lembed_chunks_cursor *pCur = (lembed_chunks_cursor *)cur; - *pRowid = pCur->iRowid; + struct llama_model *model; + rc = api_model_from_name(pCur->api, "default", strlen("default"), &model, &pCur->lctx); + if(rc != SQLITE_OK) { + return SQLITE_ERROR; + } + pCur->dimensions = llama_n_embd(model); + for(int i = 0; i < pCur->batchSize; i++) { + sqlite3_free(((char **)pCur->contentsArray.z)[i]); + } + lembed_array_cleanup(&pCur->contentsArray); + lembed_array_cleanup(&pCur->contentLengthsArray); + if(pCur->embeddings) { + sqlite3_free(pCur->embeddings); + pCur->embeddings = NULL; + } + rc = lembed_array_init(&pCur->contentsArray, sizeof(char *), 32); + assert(rc == SQLITE_OK); + rc = lembed_array_init(&pCur->contentLengthsArray, sizeof(int), 32); + assert(rc == SQLITE_OK); + pCur->iRowid = 0; + pCur->eof = 0; + + rc = embed_batch(pCur); + assert(rc == SQLITE_ROW || rc == SQLITE_DONE); return SQLITE_OK; } -static int lembed_chunksNext(sqlite3_vtab_cursor *cur) { - lembed_chunks_cursor *pCur = (lembed_chunks_cursor *)cur; +static int lembed_batchEof(sqlite3_vtab_cursor *cur){ + lembed_batch_cursor *pCur = (lembed_batch_cursor*)cur; + return (pCur->batchIdx >= pCur->batchSize) && pCur->eof; +} + + +static int lembed_batchNext(sqlite3_vtab_cursor *cur){ + lembed_batch_cursor *pCur = (lembed_batch_cursor*)cur; pCur->iRowid++; + pCur->batchIdx++; + if(pCur->batchIdx >= pCur->batchSize) { + int rc; + for(int i = 0; i < pCur->batchSize; i++) { + sqlite3_free(((char **)pCur->contentsArray.z)[i]); + } + lembed_array_cleanup(&pCur->contentsArray); + lembed_array_cleanup(&pCur->contentLengthsArray); + if(pCur->embeddings) { + sqlite3_free(pCur->embeddings); + pCur->embeddings = NULL; + } + rc = lembed_array_init(&pCur->contentsArray, sizeof(char *), 32); + assert(rc == SQLITE_OK); + rc = lembed_array_init(&pCur->contentLengthsArray, sizeof(int), 32); + assert(rc == SQLITE_OK); + rc = embed_batch(pCur); + assert(rc == SQLITE_ROW || rc == SQLITE_DONE); + } return SQLITE_OK; } -static int lembed_chunksEof(sqlite3_vtab_cursor *cur) { - lembed_chunks_cursor *pCur = (lembed_chunks_cursor *)cur; - return pCur->iRowid >= pCur->chunks_count; +static int lembed_batchRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + lembed_batch_cursor *pCur = (lembed_batch_cursor*)cur; + *pRowid = pCur->iRowid; + return SQLITE_OK; } -static int lembed_chunksColumn(sqlite3_vtab_cursor *cur, - sqlite3_context *context, int i) { - lembed_chunks_cursor *pCur = (lembed_chunks_cursor *)cur; - switch (i) { - case lembed_chunks_CONTENTS: - sqlite3_result_text(context, pCur->chunks[pCur->iRowid], -1, SQLITE_STATIC); - break; - case lembed_chunks_SOURCE: - // TODO - sqlite3_result_null(context); - break; + +static int lembed_batchColumn( + sqlite3_vtab_cursor *cur, + sqlite3_context *context, + int i +){ + lembed_batch_cursor *pCur = (lembed_batch_cursor*)cur; + switch( i ){ + case LEMBED_BATCH_CONTENTS: + sqlite3_result_text( + context, + ((char **)pCur->contentsArray.z)[pCur->batchIdx], + ((int *) pCur->contentLengthsArray.z)[pCur->batchIdx], + SQLITE_TRANSIENT + ); + break; + case LEMBED_BATCH_EMBEDDING: + sqlite3_result_blob( + context, + pCur->embeddings + (pCur->dimensions * pCur->batchIdx), + sizeof(float) * pCur->dimensions, + SQLITE_TRANSIENT + ); + sqlite3_result_subtype(context, SQLITE_VEC_FLOAT32_SUBTYPE); + break; + default: + sqlite3_result_null(context); } return SQLITE_OK; } -static sqlite3_module lembed_chunksModule = { - /* iVersion */ 0, - /* xCreate */ 0, - /* xConnect */ lembed_chunksConnect, - /* xBestIndex */ lembed_chunksBestIndex, - /* xDisconnect */ lembed_chunksDisconnect, - /* xDestroy */ 0, - /* xOpen */ lembed_chunksOpen, - /* xClose */ lembed_chunksClose, - /* xFilter */ lembed_chunksFilter, - /* xNext */ lembed_chunksNext, - /* xEof */ lembed_chunksEof, - /* xColumn */ lembed_chunksColumn, - /* xRowid */ lembed_chunksRowid, - /* xUpdate */ 0, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindMethod */ 0, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ 0}; +/* +** This following structure defines all the methods for the +** virtual table. +*/ +static sqlite3_module lembed_batchModule = { + /* iVersion */ 3, + /* xCreate */ 0, + /* xConnect */ lembed_batchConnect, + /* xBestIndex */ lembed_batchBestIndex, + /* xDisconnect */ lembed_batchDisconnect, + /* xDestroy */ 0, + /* xOpen */ lembed_batchOpen, + /* xClose */ lembed_batchClose, + /* xFilter */ lembed_batchFilter, + /* xNext */ lembed_batchNext, + /* xEof */ lembed_batchEof, + /* xColumn */ lembed_batchColumn, + /* xRowid */ lembed_batchRowid, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0, + /* xIntegrity */ 0 +}; #pragma endregion #ifndef SQLITE_SUBTYPE @@ -860,7 +1172,9 @@ __declspec(dllexport) llama_log_set(dummy_log, NULL); struct Api *a = sqlite3_malloc(sizeof(struct Api)); - assert(a); + if(!a) { + return SQLITE_NOMEM; + } memset(a, 0, sizeof(*a)); int rc = SQLITE_OK; @@ -895,10 +1209,10 @@ __declspec(dllexport) // clang-format off {"lembed", lembed, 1}, {"lembed", lembed, 2}, + {"lembed_tokenize_json", lembed_tokenize_json, 1}, {"lembed_tokenize_json", lembed_tokenize_json, 2}, {"lembed_token_score", lembed_token_score, 2}, {"lembed_token_to_piece", lembed_token_to_piece_, 2}, - {"lembed_model_size", lembed_model_size, 1}, {"lembed_model_from_file", lembed_model_from_file, 1}, {"lembed_model_options", lembed_model_options_, -1}, {"lembed_context_options", lembed_context_options_, -1}, @@ -915,7 +1229,7 @@ __declspec(dllexport) sqlite3_create_function_v2(db, "_lembed_api", 0, 0, a, _noop, NULL, NULL, api_free); - sqlite3_create_module_v2(db, "lembed_chunks", &lembed_chunksModule, a, NULL); sqlite3_create_module_v2(db, "lembed_models", &lembed_modelsModule, a, NULL); + sqlite3_create_module_v2(db, "lembed_batch", &lembed_batchModule, a, NULL); return SQLITE_OK; } diff --git a/test.sql b/test.sql index 20dc3a2..e841be1 100644 --- a/test.sql +++ b/test.sql @@ -1,5 +1,5 @@ -.load ./dist/lembed0 -.load ../sqlite-vec/dist/vec0 +--.load ./dist/lembed0 +--.load ../sqlite-vec/dist/vec0 .mode box .header on @@ -8,12 +8,124 @@ .timer on .echo on -select lembed_version(), lembed_debug(); +select sqlite_version(), lembed_version(), vec_version(); INSERT INTO temp.lembed_models(name, model) - select 'all-MiniLM-L6-v2', lembed_model_from_file('models/all-MiniLM-L6-v2-44eb4044.gguf'); + select 'default', 'dist/.models/mxbai-embed-xsmall-v1-q8_0.gguf'; + +create table articles as + select + column1 as headline, + random() % 100 as random + from (VALUES + ('Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft'), + ('The jury has been selected in Hunter Biden''s gun trial'), + ('Larry Allen, a Super Bowl champion and famed Dallas Cowboy, has died at age 52'), + ('After saying Charlotte, a lone stingray, was pregnant, aquarium now says she''s sick'), + ('An Epoch Times executive is facing money laundering charge'), + ('Hassan Nasrallah’s killing transforms an already deadly regional conflict'), + ('Who was Hassan Nasrallah, the Hezbollah leader killed by Israel?'), + ('What is Hezbollah, the militia fighting Israel in Lebanon?'), + ('Netanyahu defies calls for a cease-fire at the U.N., as Israel strikes Lebanon'), + ('Death toll from Hurricane Helene mounts as aftermath assessment begins'), + ('5 things to know from this week’s big report on cannabis'), + ('VP debates may alter a close race’s dynamic even when they don''t predict the winner'), + ('SpaceX launches ISS-bound crew that hopes to bring home 2 stuck astronauts'), + ('Why the price of eggs is on the rise again'), + ('A guide to your weekend viewing and reading'), + ('At the border in Arizona, Harris lays out a plan to get tough on fentanyl'), + ('A new kind of drug for schizophrenia promises fewer side effects'), + ('Meet the astronauts preparing to travel farther from Earth than any human before'), + ('‘SNL’ has always taken on politics. Here’s what works — and why'), + ('Golden-age rappers make a digital-age leap — and survive'), + ('Why Russia''s broadcaster RT turned to covertly funding American pro-Trump influencers'), + ('Read the indictment: NYC Mayor Eric Adams charged with bribery, fraud, foreign donations'), + ('Justice Department sues Alabama, claiming it purged voters too close to the election'), + ('Exactly 66 years ago, another Hurricane Helene rocked the Carolinas'), + ('A meteorologist in Atlanta rescued a woman from Helene floodwaters on camera') + ); + +select + *, + contents, + vec_to_json(vec_slice(embedding, 0, 8)) +from lembed_batch( + ( + select json_group_array( + json_object( + 'id', rowid, + 'contents', headline, + 'random', random + ) + ) from articles + ) +); + + +.exit +select * from articles; + +.timer on +select headline, length(lembed( headline)) from articles; + +select + rowid, + contents, + --length(embedding), + vec_to_json(vec_slice(embedding, 0, 8)) +from lembed_batch( + ( + select json_group_array( + json_object( + 'id', rowid, + 'contents', headline + ) + ) from articles + ) +); + +select + rowid, + headline, + vec_to_json(vec_slice(lembed(headline), 0, 8)) +from articles; + +.exit + +select + rowid, + contents, + --length(embedding), + vec_to_json(vec_slice(embedding, 0, 8)), + vec_to_json(vec_slice(lembed(contents), 0, 8)) + +from lembed_batch( + ( + '[ + {"contents": "Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft"} + ]' + ) +); +select + rowid, + contents, + --length(embedding), + vec_to_json(vec_slice(embedding, 0, 8)), + vec_to_json(vec_slice(lembed(contents), 0, 8)) + +from lembed_batch( + ( + '[ + {"contents": "Shohei Ohtani''s ex-interpreter pleads guilty to charges related to gambling and theft"}, + {"contents": "The jury has been selected in Hunter Biden''s gun trial"} + ]' + ) +); + + +.exit select vec_length(lembed('all-MiniLM-L6-v2', 'hello')) as embedding; diff --git a/tests/test-loadable.py b/tests/test-loadable.py index bf1166c..b47ddbe 100644 --- a/tests/test-loadable.py +++ b/tests/test-loadable.py @@ -8,6 +8,8 @@ EXT_PATH = "./dist/lembed0" MODEL1_PATH = "./dist/.models/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf" +MODEL2_PATH = "./dist/.models/mxbai-embed-xsmall-v1-q8_0.gguf" +MODEL3_PATH = "./dist/.models/nomic-embed-text-v1.5.Q2_K.gguf" def connect(ext, path=":memory:", extra_entrypoint=None): @@ -71,10 +73,11 @@ def _raises(message, error=sqlite3.OperationalError): "lembed_token_score", "lembed_token_to_piece", "lembed_tokenize_json", + "lembed_tokenize_json", "lembed_version", ] MODULES = [ - "lembed_chunks", + "lembed_batch", "lembed_models", ] @@ -110,13 +113,14 @@ def test_lembed_debug(): def test_lembed(): + lembed = lambda *args: db.execute( + "select lembed({})".format(spread_args(args)), args + ).fetchone()[0] + db.execute( "insert into temp.lembed_models(name, model) values (?, lembed_model_from_file(?))", ["aaa", MODEL1_PATH], ) - lembed = lambda *args: db.execute( - "select lembed({})".format(spread_args(args)), args - ).fetchone()[0] a = lembed("aaa", "alex garcia") assert len(a) == (384 * 4) assert struct.unpack("1f", a[0:4])[0] == pytest.approx( @@ -128,6 +132,36 @@ def test_lembed(): ): lembed("aaaaaaaaa", "alex garcia") +def test_lembed_multiple(): + db = connect(EXT_PATH) + lembed = lambda *args: db.execute( + "select lembed({})".format(spread_args(args)), args + ).fetchone()[0] + + db.execute( + "insert into temp.lembed_models(name, model) values (?, ?), (?, ?), (?, ?)", + ["aaa", MODEL1_PATH, "bbb", MODEL2_PATH, "ccc", MODEL3_PATH], + ) + a = lembed("aaa", "alex garcia") + b = lembed("bbb", "alex garcia") + c = lembed("ccc", "alex garcia") + assert len(a) == (384 * 4) + assert len(b) == (384 * 4) + assert len(c) == (768 * 4) + + assert execute_all(db, "select * from lembed_models") == [ + {"name": "aaa", "model": None, "dimensions": 384, "n_ctx": 512, "pooling_type": "none"}, + {"name": "bbb", "model": None, "dimensions": 384, "n_ctx": 512, "pooling_type": "mean"}, + {"name": "ccc", "model": None, "dimensions": 768, "n_ctx": 512, "pooling_type": "mean"}, + ] + + +def test_lembed_default(): + db = connect(EXT_PATH) + lembed = lambda *args: db.execute( + "select lembed({})".format(spread_args(args)), args + ).fetchone()[0] + with _raises("No default model has been registered yet with lembed_models"): lembed("alex garcia") @@ -141,6 +175,68 @@ def test_lembed(): -0.09205757826566696, rel=1e-2 ) + # test 2: try with NULL name + db = connect(EXT_PATH) + lembed = lambda *args: db.execute( + "select lembed({})".format(spread_args(args)), args + ).fetchone()[0] + + with _raises("No default model has been registered yet with lembed_models"): + lembed("alex garcia") + + db.execute( + "insert into temp.lembed_models(model) values (lembed_model_from_file(?))", + [MODEL1_PATH], + ) + a = lembed("alex garcia") + assert len(a) == (384 * 4) + assert struct.unpack("1f", a[0:4])[0] == pytest.approx( + -0.09205757826566696, rel=1e-2 + ) + + # test 3: try text path to model + db = connect(EXT_PATH) + lembed = lambda *args: db.execute( + "select lembed({})".format(spread_args(args)), args + ).fetchone()[0] + + with _raises("No default model has been registered yet with lembed_models"): + lembed("alex garcia") + + db.execute( + "insert into temp.lembed_models(model) values (?)", + [MODEL1_PATH], + ) + a = lembed("alex garcia") + assert len(a) == (384 * 4) + assert struct.unpack("1f", a[0:4])[0] == pytest.approx( + -0.09205757826566696, rel=1e-2 + ) + +def test_stress_mxbai_xsmall(): + db = connect(EXT_PATH) + lembed = lambda *args: db.execute( + "select lembed({})".format(spread_args(args)), args + ).fetchone()[0] + + with _raises("No default model has been registered yet with lembed_models"): + lembed("alex garcia") + + db.execute( + "insert into temp.lembed_models(name, model) values (?, lembed_model_from_file(?))", + ["default", MODEL1_PATH], + ) + assert len(lembed("a " * 256)) == 384*4 + #print(db.execute('select lembed_tokenize_json(\'a a a a\') as x').fetchone()["x"]) + + # including start and end token, this is 512 tokens, max ctx size for all-mini + lembed("a " * (510)) + + with _raises("Error generating embedding: Input too long, provided 513 tokens, but model has context size of 512"): + lembed("a " * (511)) + + with _raises("Error generating embedding: Input too long, provided 4098 tokens, but model has context size of 512"): + lembed("a " * (4096)) @pytest.mark.skip(reason="TODO") def test__lembed_api(): @@ -203,11 +299,10 @@ def test_lembed_token_to_piece(): ).fetchone()[0] pass - @pytest.mark.skip(reason="TODO") -def test_lembed_chunks(): - lembed_chunks = lambda *args: db.execute( - "select * from lembed_chunks()", args +def test_lembed_batch(): + lembed_batch = lambda *args: db.execute( + "select * from lembed_batch()", args ).fetchone()[0] pass diff --git a/vendor/llama.cpp b/vendor/llama.cpp deleted file mode 160000 index 2b33896..0000000 --- a/vendor/llama.cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2b3389677a833cee0880226533a1768b1a9508d2