diff --git a/.evergreen/config.yml b/.evergreen/config.yml index 908266c..93b9ee8 100644 --- a/.evergreen/config.yml +++ b/.evergreen/config.yml @@ -74,6 +74,19 @@ functions: args: - .evergreen/provision-atlas.sh + "setup community atlas": + - command: subprocess.exec + type: test + retry_on_failure: true + params: + env: + COMMUNITY_WITH_SEARCH: "1" + include_expansions_in_env: [DIR, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN] + working_dir: "src" + binary: bash + args: + - .evergreen/provision-atlas.sh + "setup remote atlas": - command: subprocess.exec type: test @@ -140,6 +153,13 @@ tasks: - func: "setup local atlas" - func: "execute tests" + - name: test-langchain-python-community + tags: [local] + commands: + - func: "fetch repo" + - func: "setup community atlas" + - func: "execute tests" + - name: test-langchain-python-remote tags: [remote] commands: @@ -332,6 +352,13 @@ tasks: - func: "setup remote atlas" - func: "execute tests" + - name: test-self-community + tags: [local] + commands: + - func: "setup community atlas" + - func: "execute tests" + + buildvariants: - name: test-semantic-kernel-python-rhel display_name: Semantic-Kernel RHEL Python @@ -359,14 +386,15 @@ buildvariants: # batchtime: 10080 # 1 week - name: test-langchain-python-rhel - display_name: Langchain RHEL Python + display_name: Langchain Ubuntu Python tags: [python] expansions: DIR: langchain-python run_on: - - rhel8.9-small + - ubuntu2204-small tasks: - name: test-langchain-python-local + - name: test-langchain-python-community - name: test-langchain-python-remote batchtime: 10080 # 1 week @@ -530,3 +558,13 @@ buildvariants: tasks: - name: test-pymongo-search-utils-local - name: test-pymongo-search-utils-remote + + - name: test-self-ubuntu + display_name: Self Test Ubuntu + tags: [python] + expansions: + DIR: .evergreen/mongodb-community-search + run_on: + - ubuntu2204-small + tasks: + - name: test-self-community diff --git a/.evergreen/fetch-secrets.sh b/.evergreen/fetch-secrets.sh index 2ca2980..904c998 100644 --- a/.evergreen/fetch-secrets.sh +++ b/.evergreen/fetch-secrets.sh @@ -3,7 +3,9 @@ set -eu # Clone drivers-evergeen-tools. -git clone https://github.com/mongodb-labs/drivers-evergreen-tools +if [ ! -d drivers-evergreen-tools ]; then + git clone https://github.com/mongodb-labs/drivers-evergreen-tools +fi # Get the secrets for drivers/ai-ml-pipeline-testing. . drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing diff --git a/.evergreen/lint_config.py b/.evergreen/lint_config.py index 2d57617..fab91e2 100644 --- a/.evergreen/lint_config.py +++ b/.evergreen/lint_config.py @@ -16,7 +16,7 @@ CURRENT_DIR = Path(__file__).parent.resolve() CONFIG_YML = CURRENT_DIR / "config.yml" -VALID_LANGUAGES = {"python", "golang", "javascript", "csharp"} +VALID_LANGUAGES = {"python", "golang", "javascript", "csharp", "self"} def load_yaml_file(file_path: str) -> Dict[Any, Any]: diff --git a/.evergreen/mongodb-community-search/.gitignore b/.evergreen/mongodb-community-search/.gitignore new file mode 100644 index 0000000..d42a366 --- /dev/null +++ b/.evergreen/mongodb-community-search/.gitignore @@ -0,0 +1,2 @@ +secrets +logs diff --git a/.evergreen/mongodb-community-search/README.md b/.evergreen/mongodb-community-search/README.md new file mode 100644 index 0000000..40a0ade --- /dev/null +++ b/.evergreen/mongodb-community-search/README.md @@ -0,0 +1,40 @@ +# Run MongoDB Community Search + +Script run MongoDB Community Search Locally using docker compose. + +## Prerequisite + +1. Follow through the steps outlined + [here](https://github.com/10gen/mongot/blob/master/docs/development/docker.md#authenticate-with-ecr) + to authenticate with ECR. We depend on an internally released image of mongot + available on an internal registry. +2. Ensure the following entries are in your `/etc/hosts` file: `127.0.0.1 host.docker.internal` + +## Setup + +Set required environment variables: + +```bash +export VOYAGE_QUERY_API_KEY= +export VOYAGE_INDEXING_API_KEY= +``` + +## Run + +```bash +sh ./start-services.sh +``` + +This will: + +- Create secret files from environment variables (if not present) +- Start MongoDB and mongot containers + +Note: If you already have the secrets folder in your repo. The script will skip generating those secrets again and also skip the permission modifications. The permissions for the files containing secrets should be readonly otherwise `mongot` will refuse configuring a provider. Ensure that your files containing api keys that mounted to `mongot` container in the `docker-compose.yml` have the following permissions: `400`. + +## Ports + +- MongoDB: 27017 +- Mongot Query: 27028 +- Mongot Metrics: 9946 +- Mongot Health: 8080 diff --git a/.evergreen/mongodb-community-search/config.env b/.evergreen/mongodb-community-search/config.env new file mode 100644 index 0000000..2a19011 --- /dev/null +++ b/.evergreen/mongodb-community-search/config.env @@ -0,0 +1,2 @@ +DATABASE=test +REPO_NAME=. diff --git a/.evergreen/mongodb-community-search/docker-compose.yml b/.evergreen/mongodb-community-search/docker-compose.yml new file mode 100644 index 0000000..6f06075 --- /dev/null +++ b/.evergreen/mongodb-community-search/docker-compose.yml @@ -0,0 +1,56 @@ +# Docker-compose template taken from public facing docs at - +# https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/vector-search-quick-start/?deployment-type=self +services: + mongod: + image: mongodb/mongodb-community-server:latest + command: >- + mongod + --config /etc/mongod.conf + --replSetMember=mongod.search-community:27017 + ports: + - 27017:27017 + extra_hosts: + - "host.docker.internal:host-gateway" + volumes: + - mongod_data:/data/db + - ./mongod.conf:/etc/mongod.conf:ro + - ./init-mongo.sh:/docker-entrypoint-initdb.d/init-mongo.sh:ro + networks: + - search-community + + + mongot: + # The public facing docs have the publicly available mongot image but here + # we use the image published to the internal registry. + + # Note that you first need to login to the registry by following the docs + # here: + # https://github.com/10gen/mongot/blob/master/docs/development/docker.md#authenticate-with-ecr + # image: mongodb/mongodb-community-search:latest + image: 901841024863.dkr.ecr.us-east-1.amazonaws.com/mongot-community/rapid-releases:latest + networks: + - search-community + volumes: + - mongot_data:/data/mongot + - ./mongot.conf:/mongot-community/config.default.yml + - ./pwfile:/mongot-community/pwfile:ro + - ./secrets/voyage-api-query-key:/etc/voyage-api-query-key:ro + - ./secrets/voyage-api-indexing-key:/etc/voyage-api-indexing-key:ro + depends_on: + - mongod + ports: + - 27028:27028 # Query server port from config + - 9946:9946 # Metrics port from config + - 8080:8080 # Health + entrypoint: + - /mongot-community/mongot + - --config=/mongot-community/config.default.yml + - --internalListAllIndexesForTesting=true +volumes: + mongod_data: + mongot_data: + +networks: + search-community: + name: search-community + external: true # Use an external network if it exists. Comment this line if you want to create a new network. diff --git a/.evergreen/mongodb-community-search/init-mongo.sh b/.evergreen/mongodb-community-search/init-mongo.sh new file mode 100644 index 0000000..35b6836 --- /dev/null +++ b/.evergreen/mongodb-community-search/init-mongo.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -e + +echo "Starting MongoDB initialization..." +sleep 2 + +# Create user using local connection (no port specification needed) +echo "Creating user..." +mongosh --eval " +const adminDb = db.getSiblingDB('admin'); +try { +adminDb.createUser({ + user: 'mongotUser', + pwd: 'mongotPassword', + roles: [{ role: 'searchCoordinator', db: 'admin' }] +}); +print('User mongotUser created successfully'); +} catch (error) { +if (error.code === 11000) { + print('User mongotUser already exists'); +} else { + print('Error creating user: ' + error); +} +} +" + +echo "MongoDB initialization completed." diff --git a/.evergreen/mongodb-community-search/mongod.conf b/.evergreen/mongodb-community-search/mongod.conf new file mode 100644 index 0000000..b6085e8 --- /dev/null +++ b/.evergreen/mongodb-community-search/mongod.conf @@ -0,0 +1,16 @@ +# mongod.conf +storage: + dbPath: /data/db + +net: + port: 27017 + bindIp: 0.0.0.0 + +setParameter: + searchIndexManagementHostAndPort: mongot.search-community:27028 + mongotHost: mongot.search-community:27028 + skipAuthenticationToSearchIndexManagementServer: false + useGrpcForSearch: true + +replication: + replSetName: rs0 diff --git a/.evergreen/mongodb-community-search/mongot.conf b/.evergreen/mongodb-community-search/mongot.conf new file mode 100644 index 0000000..fef22f5 --- /dev/null +++ b/.evergreen/mongodb-community-search/mongot.conf @@ -0,0 +1,27 @@ +syncSource: + replicaSet: + hostAndPort: "mongod.search-community:27017" + username: mongotUser + passwordFile: /mongot-community/pwfile + authSource: admin + tls: false + readPreference: primaryPreferred +storage: + dataPath: "data/mongot" +server: + grpc: + address: "mongot.search-community:27028" + tls: + mode: "disabled" +metrics: + enabled: true + address: "mongot.search-community:9946" +healthCheck: + address: "mongot.search-community:8080" +logging: + verbosity: INFO +embedding: + queryKeyFile: /etc/voyage-api-query-key + indexingKeyFile: /etc/voyage-api-indexing-key + providerEndpoint: https://api.voyageai.com/v1/embeddings + isAutoEmbeddingViewWriter: true diff --git a/.evergreen/mongodb-community-search/run.sh b/.evergreen/mongodb-community-search/run.sh new file mode 100644 index 0000000..22fe4c2 --- /dev/null +++ b/.evergreen/mongodb-community-search/run.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -eu + +SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})") +ROOT_DIR=$(dirname "$(dirname $SCRIPT_DIR)") + +. $ROOT_DIR/env.sh +. $ROOT_DIR/.evergreen/utils.sh + +PYTHON_BINARY=$(find_python3) + +pushd $SCRIPT_DIR + +$PYTHON_BINARY -m venv .venv + +source .venv/bin/activate + +pip install pymongo + +python self_test.py +popd diff --git a/.evergreen/mongodb-community-search/self_test.py b/.evergreen/mongodb-community-search/self_test.py new file mode 100644 index 0000000..a95baf8 --- /dev/null +++ b/.evergreen/mongodb-community-search/self_test.py @@ -0,0 +1,86 @@ +import os +from pymongo import MongoClient +from pymongo.operations import SearchIndexModel +from time import sleep + +print("Beginning simple test of vectorSearch with autoEmbed index.") + +# Connect and create collection +client = MongoClient(os.environ["MONGODB_URI"]) +db = client.self_test +movies = db.create_collection("movies") + +# Create auto-embed index (public preview-style syntax) +movies.create_search_index( + model=SearchIndexModel( + name="auto_embed_plot_index", + type="vectorSearch", + definition={ + "fields": [ + { + "type": "autoEmbed", + "path": "plot", + "model": "voyage-4", + "modality": "text", + }, + ], + }, + ) +) +sleep(10) + +# Insert documents +movies.insert_many( + [ + { + "cast": ["Cillian Murphy", "Emily Blunt", "Matt Damon"], + "director": "Christopher Nolan", + "genres": ["Biography", "Drama", "History"], + "imdb": { + "rating": 8.3, + "votes": 680000, + }, + "plot": "The story of American scientist J. Robert Oppenheimer and his role in the development of the atomic bomb during World War II.", + "runtime": 180, + "title": "Oppenheimer", + "year": 2023, + }, + { + "cast": ["Andrew Garfield", "Claire Foy", "Hugh Bonneville"], + "director": "Andy Serkis", + "genres": ["Biography", "Drama", "Romance"], + "imdb": { + "rating": 7.2, + "votes": 42000, + }, + "plot": "The inspiring true love story of Robin and Diana Cavendish, an adventurous couple who refuse to give up in the face of a devastating disease.", + "runtime": 118, + "title": "Breathe", + "year": 2017, + }, + ] +) +sleep(10) + +# Run vector search aggregation using auto-embed index +search_results = list( + movies.aggregate( + [ + { + "$vectorSearch": { + "index": "auto_embed_plot_index", + "path": "plot", + "query": {"text": "movie about couples"}, + "limit": 1, + "numCandidates": 10, + } + } + ] + ) +) + +print(f"{len(search_results)=}") +assert len(search_results) == 1 +for doc in search_results: + print(doc) + assert doc["title"] == "Breathe" diff --git a/.evergreen/mongodb-community-search/start-services.sh b/.evergreen/mongodb-community-search/start-services.sh new file mode 100644 index 0000000..9cd7498 --- /dev/null +++ b/.evergreen/mongodb-community-search/start-services.sh @@ -0,0 +1,83 @@ +#!/bin/bash +set -eu +pushd "$(dirname ${BASH_SOURCE:-$0})" > /dev/null + +source ../../secrets-export.sh +export VOYAGE_QUERY_API_KEY=$VOYAGEAI_API_KEY +export VOYAGE_INDEXING_API_KEY=$VOYAGEAI_API_KEY + +grep -qxF '127.0.0.1 host.docker.internal' /etc/hosts || echo '127.0.0.1 host.docker.internal' | sudo tee -a /etc/hosts + +rm -f pwfile || true +echo -n "mongotPassword" > pwfile +chmod 400 pwfile + +# Create secrets directory if it doesn't exist +mkdir -p secrets + +if [ ! -f secrets/voyage-api-query-key ]; then + if [ -z "$VOYAGE_QUERY_API_KEY" ]; then + echo "Error: VOYAGE_QUERY_API_KEY environment variable is not set." + echo "Please set it using: export VOYAGE_QUERY_API_KEY=" + exit 1 + fi +fi + +if [ ! -f secrets/voyage-api-indexing-key ]; then + if [ -z "$VOYAGE_INDEXING_API_KEY" ]; then + echo "Error: VOYAGE_INDEXING_API_KEY environment variable is not set." + echo "Please set it using: export VOYAGE_INDEXING_API_KEY=" + exit 1 + fi +fi + +# Create voyage api key files from environment variables +if [ ! -f secrets/voyage-api-query-key ]; then + printf '%s' "$VOYAGE_QUERY_API_KEY" > secrets/voyage-api-query-key + chmod 400 secrets/voyage-api-query-key + echo "Created secrets/voyage-api-query-key" +else + echo "secrets/voyage-api-query-key already exists, skipping." +fi + +if [ ! -f secrets/voyage-api-indexing-key ]; then + printf '%s' "$VOYAGE_INDEXING_API_KEY" > secrets/voyage-api-indexing-key + chmod 400 secrets/voyage-api-indexing-key + echo "Created secrets/voyage-api-indexing-key" +else + echo "secrets/voyage-api-indexing-key already exists, skipping." +fi + +docker network create search-community || true +docker compose down || true +docker compose up -d + +# Wait for the healthcheck +URL="http://127.0.0.1:8080/healthcheck" + +echo "Waiting for the server to be alive and respond with the expected status..." +set +e +while true; do + # Make the request and capture response with detailed debugging + RESPONSE=$(curl --max-time 10 -s "$URL") + CURL_EXIT_CODE=$? + + # Check for Curl exit code + if [ "$CURL_EXIT_CODE" -ne 0 ]; then + echo "Curl failed with exit code $CURL_EXIT_CODE, retrying in 2 seconds..." + sleep 2 + continue + fi + + # Verify the response matches the expected JSON + if [ "$RESPONSE" == '{"status":"SERVING"}' ]; then + echo "Server is now alive and responding properly!" + break + fi + + echo "Server not ready yet. Retrying in 2 seconds..." + sleep 2 +done +set -e + +docker compose logs diff --git a/.evergreen/provision-atlas.sh b/.evergreen/provision-atlas.sh index 7619a2b..ad3598f 100644 --- a/.evergreen/provision-atlas.sh +++ b/.evergreen/provision-atlas.sh @@ -23,3 +23,4 @@ echo "export AZURE_OPENAI_API_KEY=$AZURE_OPENAI_API_KEY" >> env.sh echo "export OPENAI_API_VERSION=$OPENAI_API_VERSION" >> env.sh echo "export MONGODB_URI=$CONN_STRING" >> env.sh echo "export VOYAGEAI_API_KEY=$VOYAGEAI_API_KEY" >> env.sh +echo "export COMMUNITY_WITH_SEARCH=${COMMUNITY_WITH_SEARCH-}" >> env.sh diff --git a/.evergreen/utils.sh b/.evergreen/utils.sh index d72e673..99f1c83 100644 --- a/.evergreen/utils.sh +++ b/.evergreen/utils.sh @@ -60,8 +60,15 @@ setup_local_atlas() { # Ensure drivers-evergeen-tools checkout. pushd $SCRIPT_DIR/.. git clone https://github.com/mongodb-labs/drivers-evergreen-tools || true - . drivers-evergreen-tools/.evergreen/run-orchestration.sh --local-atlas -v popd + if [ -z "${COMMUNITY_WITH_SEARCH:-}" ]; then + . $SCRIPT_DIR/../drivers-evergreen-tools/.evergreen/run-orchestration.sh --local-atlas -v + else + if [ -n "${CI:-}" ]; then + bash $SCRIPT_DIR/../drivers-evergreen-tools/.evergreen/docker/setup.sh + fi + bash .evergreen/mongodb-community-search/start-services.sh + fi export CONN_STRING"=mongodb://127.0.0.1:27017/?directConnection=true" echo "CONN_STRING=$CONN_STRING" > $SCRIPT_DIR/.local_atlas_uri } diff --git a/.gitignore b/.gitignore index fa20a3d..972b725 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,7 @@ xunit-results/ drivers-evergreen-tools atlas .evergreen/.local_atlas_uri +pwfile # Secrets secrets-export.sh @@ -62,3 +63,4 @@ haystack-fulltext/haystack-core-integrations haystack-embeddings/haystack-core-integrations pymongo-voyageai/pymongo-voyageai llama-index-python-vectorstore/llama_index/ +langchain-python/langchain-mongodb/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a1619b9..e2076dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,12 +53,15 @@ repos: rev: "v2.2.6" hooks: - id: codespell + args: ["-L", "damon"] - repo: local hooks: - id: check-buildvariant-tags name: Check buildvariant language tags entry: python3 .evergreen/lint_config.py - language: system + language: python files: .evergreen/config.yml - args: ['--languages=python,golang,javascript,csharp'] + args: ['--languages=python,golang,javascript,csharp,self'] + additional_dependencies: + - pyyaml diff --git a/README.md b/README.md index 28cbc52..2612144 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,7 @@ bash .evergreen/execute-tests.sh ``` Use `.evergreen/setup-remote.sh` instead of `.evergreen/provision-atlas.sh` to test against the remote cluster. +Set `COMMUNITY_WITH_SEARCH=1` to test against MongoDB Community with Search edition. #### Pre-populating the Local Atlas Deployment