Skip to content

DRAFT: Batch support #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -3,15 +3,14 @@ on:
push:
branches:
- main
- batching
permissions:
contents: read
jobs:
build-linux-x86_64-extension:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: "3.12"
@@ -25,8 +24,6 @@ jobs:
runs-on: macos-12
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- run: make loadable
- run: /usr/local/opt/python@3/libexec/bin/python -m pip install --break-system-packages pytest numpy; make test-loadable python=/usr/local/opt/python@3/libexec/bin/python
- uses: actions/upload-artifact@v4
@@ -37,8 +34,6 @@ jobs:
runs-on: macos-14
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- run: make loadable
- run: /opt/homebrew/opt/python3/libexec/bin/python -m pip install pytest numpy --break-system-packages; make test-loadable python=/opt/homebrew/opt/python3/libexec/bin/python
- uses: actions/upload-artifact@v4
@@ -49,8 +44,6 @@ jobs:
runs-on: windows-2019
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: ilammy/msvc-dev-cmd@v1
- uses: actions/setup-python@v5
with:
@@ -63,3 +56,26 @@ jobs:
with:
name: sqlite-lembed-windows-x86_64-extension
path: dist/*.dll
build-cosmopolitan:
if: false
runs-on: macos-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v4
- run: |
mkdir $HOME/cosmo
curl -L -o cosmocc-$COSMO_VERSION.zip https://github.com/jart/cosmopolitan/releases/download/$COSMO_VERSION/cosmocc-$COSMO_VERSION.zip
unzip cosmocc-$COSMO_VERSION.zip -d $HOME/cosmo
env:
COSMO_VERSION: "3.9.6"
- run: make sqlite-lembed.h
- run: make cli CC=$HOME/cosmo/bin/cosmocc CXX=$HOME/cosmo/bin/cosmoc++ AR=$HOME/cosmo/bin/cosmoar OMIT_SIMD=1
- run: tar -czvf sqlite-lembed-$(cat VERSION)-cli-cosmopolitan.tar.gz dist/sqlite3
- run: gh release upload ${{ github.ref_name }} sqlite-lembed-$(cat VERSION)-cli-cosmopolitan.tar.gz
env:
GH_TOKEN: ${{ github.token }}
- uses: actions/upload-artifact@v4
with:
name: sqlite-lembed-cosmopolitan
path: dist/*
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -10,3 +10,5 @@ ggml-metal.metal
News*.json
sqlite-lembed.h
dist/

*.db
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
[submodule "vendor/llama.cpp"]
path = vendor/llama.cpp
url = https://github.com/ggerganov/llama.cpp.git
77 changes: 56 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -4,42 +4,77 @@ project(SqliteEmbed C CXX)
set(CMAKE_C_STANDARD 99)
set(CMAKE_C_STANDARD_REQUIRED ON)

set(LLAMA_METAL OFF)
set(LLAMA_STATIC ON)
set(LLAMA_OPENMP OFF)

set(LLAMA_CPP_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vendor/llama.cpp")
set(SQLITE_AMALGAMATION_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vendor/sqlite")

include(FetchContent)
include(ExternalProject)

set(SQLITE_VERSION 3450300)

# sqlite amalgamation, for up-to-date headers and sqlite3 CLI
set(SQLITE_VERSION 3470000)
set(SQLITE_YEAR 2024)
set(SQLITE_URL https://www.sqlite.org/${SQLITE_YEAR}/sqlite-amalgamation-${SQLITE_VERSION}.zip)
ExternalProject_Add(sqlite_amalgamation
URL ${SQLITE_URL}
DOWNLOAD_DIR ${CMAKE_BINARY_DIR}/downloads
SOURCE_DIR ${SQLITE_AMALGAMATION_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
)
set(SQLITE_URL)
FetchContent_Declare(
sqlite_amalgamation
URL https://www.sqlite.org/${SQLITE_YEAR}/sqlite-amalgamation-${SQLITE_VERSION}.zip
)
FetchContent_MakeAvailable(sqlite_amalgamation)


add_subdirectory(${LLAMA_CPP_DIR} ${CMAKE_BINARY_DIR}/llama.cpp)
# llama.cpp
set(LLAMA_METAL OFF)
set(LLAMA_STATIC ON)
set(LLAMA_OPENMP OFF)

include_directories(${SQLITE_AMALGAMATION_DIR})
FetchContent_Declare(
llama_cpp
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git
GIT_TAG b3091
)
FetchContent_MakeAvailable(llama_cpp)

# sqlite-lembed loadable
add_library(sqlite_lembed SHARED sqlite-lembed.c)
add_dependencies(sqlite_lembed sqlite_amalgamation)
target_link_libraries(sqlite_lembed ggml_static llama)
target_include_directories(sqlite_lembed PRIVATE ${LLAMA_CPP_DIR})
target_include_directories(sqlite_lembed PRIVATE ${sqlite_amalgamation_SOURCE_DIR})
set_target_properties(sqlite_lembed PROPERTIES PREFIX "")
set_target_properties(sqlite_lembed PROPERTIES OUTPUT_NAME "lembed0")

# sqlite-lembed static
add_library(sqlite_lembed_static STATIC sqlite-lembed.c)
add_dependencies(sqlite_lembed_static sqlite_amalgamation)
target_link_libraries(sqlite_lembed_static ggml_static llama)
target_include_directories(sqlite_lembed_static PRIVATE ${LLAMA_CPP_DIR})
target_include_directories(sqlite_lembed_static PRIVATE ${sqlite_amalgamation_SOURCE_DIR})
target_compile_definitions(sqlite_lembed_static PRIVATE SQLITE_CORE)
set_target_properties(sqlite_lembed_static PROPERTIES OUTPUT_NAME "sqlite_lembed0")


# sqlite-vec, for a better sqlite3 CLI
set(SQLITE_VEC_VERSION 0.1.6)
FetchContent_Declare(
sqlite_vec
URL https://github.com/asg017/sqlite-vec/releases/download/v${SQLITE_VEC_VERSION}/sqlite-vec-${SQLITE_VEC_VERSION}-amalgamation.tar.gz
)
FetchContent_MakeAvailable(sqlite_vec)


# sqlite3 CLI, with sqlite-lembed and sqlite-vec
add_executable(
sqlite3_cli
${sqlite_amalgamation_SOURCE_DIR}/shell.c
${sqlite_amalgamation_SOURCE_DIR}/sqlite3.c
${sqlite_vec_SOURCE_DIR}/sqlite-vec.c
core_init.c
)
add_dependencies(sqlite3_cli sqlite_lembed_static)
target_link_libraries(sqlite3_cli sqlite_lembed_static)
target_include_directories(
sqlite3_cli PRIVATE
${sqlite_amalgamation_SOURCE_DIR}
${sqlite_vec_SOURCE_DIR}
)
target_compile_definitions(
sqlite3_cli PUBLIC
SQLITE_EXTRA_INIT=core_init
SQLITE_CORE
)
set_target_properties(sqlite3_cli PROPERTIES OUTPUT_NAME "sqlite3")
29 changes: 25 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@ COMMIT=$(shell git rev-parse HEAD)
VERSION=$(shell cat VERSION)
DATE=$(shell date +'%FT%TZ%z')

LLAMA_CMAKE_FLAGS=-DLLAMA_OPENMP=OFF
LLAMA_CMAKE_FLAGS+=-DLLAMA_OPENMP=OFF
ifndef CC
CC=gcc
endif
@@ -55,10 +55,11 @@ $(prefix):
TARGET_LOADABLE=$(prefix)/lembed0.$(LOADABLE_EXTENSION)
TARGET_STATIC=$(prefix)/libsqlite_lembed0.a
TARGET_STATIC_H=$(prefix)/sqlite-lembed.h
TARGET_CLI=$(prefix)/sqlite3

loadable: $(TARGET_LOADABLE)
static: $(TARGET_STATIC)

cli: $(TARGET_CLI)

BUILD_DIR=$(prefix)/.build

@@ -85,6 +86,16 @@ $(TARGET_LOADABLE): sqlite-lembed.c sqlite-lembed.h $(BUILD_DIR) $(prefix)
ls $(BUILD_DIR)
cp $(BUILT_LOADABLE_PATH) $@

$(TARGET_STATIC): sqlite-lembed.c sqlite-lembed.h $(BUILD_DIR) $(prefix)
cmake --build $(BUILD_DIR) -t sqlite_lembed_static $(EXTRA_CMAKE_BUILD)
ls $(BUILD_DIR)
cp $(BUILT_LOADABLE_PATH) $@

$(TARGET_CLI): sqlite-lembed.c sqlite-lembed.h $(BUILD_DIR) $(prefix)
cmake --build $(BUILD_DIR) -t sqlite3_cli $(EXTRA_CMAKE_BUILD)
ls $(BUILD_DIR)
cp $(BUILD_DIR)/sqlite3 $@


sqlite-lembed.h: sqlite-lembed.h.tmpl VERSION
VERSION=$(shell cat VERSION) \
@@ -100,8 +111,18 @@ $(MODELS_DIR): $(BUILD_DIR)
$(MODELS_DIR)/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf: $(MODELS_DIR)
curl -L -o $@ https://huggingface.co/asg017/sqlite-lembed-model-examples/resolve/main/all-MiniLM-L6-v2/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf

test-loadable: $(TARGET_LOADABLE) $(MODELS_DIR)/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf
$(PYTHON) -m pytest tests/test-loadable.py
$(MODELS_DIR)/mxbai-embed-xsmall-v1-q8_0.gguf: $(MODELS_DIR)
curl -L -o $@ https://huggingface.co/mixedbread-ai/mxbai-embed-xsmall-v1/resolve/main/gguf/mxbai-embed-xsmall-v1-q8_0.gguf

$(MODELS_DIR)/nomic-embed-text-v1.5.Q2_K.gguf: $(MODELS_DIR)
curl -L -o $@ https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.Q2_K.gguf

models: $(MODELS_DIR)/all-MiniLM-L6-v2.e4ce9877.q8_0.gguf $(MODELS_DIR)/mxbai-embed-xsmall-v1-q8_0.gguf $(MODELS_DIR)/nomic-embed-text-v1.5.Q2_K.gguf

test-loadable: $(TARGET_LOADABLE) models
$(PYTHON) -m pytest tests/test-loadable.py -s -x -vv
test-loadable-watch:
watchexec -w sqlite-lembed.c -w tests/test-loadable.py -w Makefile --clear -- make test-loadable


FORMAT_FILES=sqlite-lembed.c
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -17,8 +17,8 @@ To load it into `sqlite-lembed`, register it with the `temp.lembed_models` table
```sql
.load ./lembed0

INSERT INTO temp.lembed_models(name, model)
select 'all-MiniLM-L6-v2', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf');
insert into temp.lembed_models(name, model)
values ('default', lembed_model_from_file('all-MiniLM-L6-v2.e4ce9877.q8_0.gguf'));

select lembed(
'all-MiniLM-L6-v2',
@@ -54,7 +54,7 @@ create virtual table vec_articles using vec0(
);

insert into vec_articles(rowid, headline_embeddings)
select rowid, lembed('all-MiniLM-L6-v2', headline)
select rowid, lembed( headline)
from articles;

```
@@ -71,7 +71,7 @@ with matches as (
rowid,
distance
from vec_articles
where headline_embeddings match lembed('all-MiniLM-L6-v2', :query)
where headline_embeddings match lembed(:query)
order by distance
limit 3
)
12 changes: 12 additions & 0 deletions core_init.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "sqlite3.h"
#include "sqlite-vec.h"
#include "sqlite-lembed.h"
#include <stdio.h>
int core_init(const char *dummy) {
int rc;
rc = sqlite3_auto_extension((void *)sqlite3_vec_init);
if(rc == SQLITE_OK) {
rc = sqlite3_auto_extension((void *)sqlite3_lembed_init);
}
return rc;
}
Loading