Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
2a787c0
INTPYTHON-858 Create community search test environment
blink1073 Dec 30, 2025
f027dcf
finish setup
blink1073 Dec 30, 2025
5e47352
cleanup
blink1073 Dec 30, 2025
5e292c0
use healtcheck
blink1073 Dec 30, 2025
fc19c9d
fix healthcheck
blink1073 Dec 30, 2025
16bc993
fix healthcheck
blink1073 Dec 30, 2025
587b2d1
fix healthcheck
blink1073 Dec 30, 2025
9c041a7
fix healthcheck
blink1073 Dec 30, 2025
ffcb8c5
debug
blink1073 Dec 30, 2025
2c7afa4
fixup
blink1073 Dec 30, 2025
10a55e7
debug
blink1073 Dec 30, 2025
34116a2
handle docker internal
blink1073 Dec 30, 2025
4da3887
debug
blink1073 Dec 30, 2025
c706fd9
debug
blink1073 Dec 30, 2025
cafc148
use the service addresses
blink1073 Dec 30, 2025
1ebe3f9
Revert "use the service addresses"
blink1073 Dec 30, 2025
ebff188
use values from the tutorial
blink1073 Dec 31, 2025
6d8037c
fix is_index_ready and docker network
blink1073 Dec 31, 2025
b9ad653
add test workaround
blink1073 Dec 31, 2025
cbdaf62
fix handling of community var
blink1073 Dec 31, 2025
0b879ac
add pointer to next ticket
blink1073 Dec 31, 2025
35f8360
lint
blink1073 Dec 31, 2025
a50922c
lint
blink1073 Dec 31, 2025
d9c5cdd
remove changes to teardown
blink1073 Dec 31, 2025
f9a5a5d
add self test for community
blink1073 Dec 31, 2025
ec94665
fix self test
blink1073 Dec 31, 2025
5416ec5
fix missing var
blink1073 Dec 31, 2025
1afd11c
fix missing var
blink1073 Dec 31, 2025
2cd6109
fix var
blink1073 Dec 31, 2025
f031b4a
fix self test
blink1073 Dec 31, 2025
5c86ecb
fix self test
blink1073 Dec 31, 2025
b0a8435
fix self test
blink1073 Dec 31, 2025
9fa0c27
lint
blink1073 Dec 31, 2025
1335571
Adds embedding: isAutoEmbeddingViewWriter to see search results
caseyclements Jan 8, 2026
df3b58e
Add entrypoint:isAutoEmbeddingViewWriter = true to get status from li…
caseyclements Jan 8, 2026
5e4f036
Remove workaround to check if search index is ready INPYTHON-862
caseyclements Jan 8, 2026
3924d70
Upgraded model to voyage-3. 3.5 is no longer available.
caseyclements Jan 8, 2026
37f31f0
Removed INTPYTHON-862 workaround
caseyclements Jan 8, 2026
0f344f9
Changed env variable from COMMUNITY to COMMUNITY_WITH_SEARCH to minim…
caseyclements Jan 8, 2026
37c0b65
Cleaned up self_test. Still not robust, but a real end-to-end test.
caseyclements Jan 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 40 additions & 2 deletions .evergreen/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,19 @@ functions:
args:
- .evergreen/provision-atlas.sh

"setup community atlas":
- command: subprocess.exec
type: test
retry_on_failure: true
params:
env:
COMMUNITY_WITH_SEARCH: "1"
include_expansions_in_env: [DIR, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN]
working_dir: "src"
binary: bash
args:
- .evergreen/provision-atlas.sh

"setup remote atlas":
- command: subprocess.exec
type: test
Expand Down Expand Up @@ -140,6 +153,13 @@ tasks:
- func: "setup local atlas"
- func: "execute tests"

- name: test-langchain-python-community
tags: [local]
commands:
- func: "fetch repo"
- func: "setup community atlas"
- func: "execute tests"

- name: test-langchain-python-remote
tags: [remote]
commands:
Expand Down Expand Up @@ -332,6 +352,13 @@ tasks:
- func: "setup remote atlas"
- func: "execute tests"

- name: test-self-community
tags: [local]
commands:
- func: "setup community atlas"
- func: "execute tests"


buildvariants:
- name: test-semantic-kernel-python-rhel
display_name: Semantic-Kernel RHEL Python
Expand Down Expand Up @@ -359,14 +386,15 @@ buildvariants:
# batchtime: 10080 # 1 week

- name: test-langchain-python-rhel
display_name: Langchain RHEL Python
display_name: Langchain Ubuntu Python
tags: [python]
expansions:
DIR: langchain-python
run_on:
- rhel8.9-small
- ubuntu2204-small
tasks:
- name: test-langchain-python-local
- name: test-langchain-python-community
- name: test-langchain-python-remote
batchtime: 10080 # 1 week

Expand Down Expand Up @@ -530,3 +558,13 @@ buildvariants:
tasks:
- name: test-pymongo-search-utils-local
- name: test-pymongo-search-utils-remote

- name: test-self-ubuntu
display_name: Self Test Ubuntu
tags: [python]
expansions:
DIR: .evergreen/mongodb-community-search
run_on:
- ubuntu2204-small
tasks:
- name: test-self-community
4 changes: 3 additions & 1 deletion .evergreen/fetch-secrets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
set -eu

# Clone drivers-evergeen-tools.
git clone https://github.com/mongodb-labs/drivers-evergreen-tools
if [ ! -d drivers-evergreen-tools ]; then
git clone https://github.com/mongodb-labs/drivers-evergreen-tools
fi

# Get the secrets for drivers/ai-ml-pipeline-testing.
. drivers-evergreen-tools/.evergreen/secrets_handling/setup-secrets.sh drivers/ai-ml-pipeline-testing
2 changes: 1 addition & 1 deletion .evergreen/lint_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

CURRENT_DIR = Path(__file__).parent.resolve()
CONFIG_YML = CURRENT_DIR / "config.yml"
VALID_LANGUAGES = {"python", "golang", "javascript", "csharp"}
VALID_LANGUAGES = {"python", "golang", "javascript", "csharp", "self"}


def load_yaml_file(file_path: str) -> Dict[Any, Any]:
Expand Down
2 changes: 2 additions & 0 deletions .evergreen/mongodb-community-search/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
secrets
logs
40 changes: 40 additions & 0 deletions .evergreen/mongodb-community-search/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Run MongoDB Community Search

Script run MongoDB Community Search Locally using docker compose.

## Prerequisite

1. Follow through the steps outlined
[here](https://github.com/10gen/mongot/blob/master/docs/development/docker.md#authenticate-with-ecr)
to authenticate with ECR. We depend on an internally released image of mongot
available on an internal registry.
2. Ensure the following entries are in your `/etc/hosts` file: `127.0.0.1 host.docker.internal`

## Setup

Set required environment variables:

```bash
export VOYAGE_QUERY_API_KEY=<your-query-api-key>
export VOYAGE_INDEXING_API_KEY=<your-indexing-api-key>
```

## Run

```bash
sh ./start-services.sh
```

This will:

- Create secret files from environment variables (if not present)
- Start MongoDB and mongot containers

Note: If you already have the secrets folder in your repo. The script will skip generating those secrets again and also skip the permission modifications. The permissions for the files containing secrets should be readonly otherwise `mongot` will refuse configuring a provider. Ensure that your files containing api keys that mounted to `mongot` container in the `docker-compose.yml` have the following permissions: `400`.

## Ports

- MongoDB: 27017
- Mongot Query: 27028
- Mongot Metrics: 9946
- Mongot Health: 8080
2 changes: 2 additions & 0 deletions .evergreen/mongodb-community-search/config.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DATABASE=test
REPO_NAME=.
56 changes: 56 additions & 0 deletions .evergreen/mongodb-community-search/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Docker-compose template taken from public facing docs at -
# https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/vector-search-quick-start/?deployment-type=self
services:
mongod:
image: mongodb/mongodb-community-server:latest
command: >-
mongod
--config /etc/mongod.conf
--replSetMember=mongod.search-community:27017
ports:
- 27017:27017
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- mongod_data:/data/db
- ./mongod.conf:/etc/mongod.conf:ro
- ./init-mongo.sh:/docker-entrypoint-initdb.d/init-mongo.sh:ro
networks:
- search-community


mongot:
# The public facing docs have the publicly available mongot image but here
# we use the image published to the internal registry.

# Note that you first need to login to the registry by following the docs
# here:
# https://github.com/10gen/mongot/blob/master/docs/development/docker.md#authenticate-with-ecr
# image: mongodb/mongodb-community-search:latest
image: 901841024863.dkr.ecr.us-east-1.amazonaws.com/mongot-community/rapid-releases:latest
networks:
- search-community
volumes:
- mongot_data:/data/mongot
- ./mongot.conf:/mongot-community/config.default.yml
- ./pwfile:/mongot-community/pwfile:ro
- ./secrets/voyage-api-query-key:/etc/voyage-api-query-key:ro
- ./secrets/voyage-api-indexing-key:/etc/voyage-api-indexing-key:ro
depends_on:
- mongod
ports:
- 27028:27028 # Query server port from config
- 9946:9946 # Metrics port from config
- 8080:8080 # Health
entrypoint:
- /mongot-community/mongot
- --config=/mongot-community/config.default.yml
- --internalListAllIndexesForTesting=true
volumes:
mongod_data:
mongot_data:

networks:
search-community:
name: search-community
external: true # Use an external network if it exists. Comment this line if you want to create a new network.
27 changes: 27 additions & 0 deletions .evergreen/mongodb-community-search/init-mongo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
set -e

echo "Starting MongoDB initialization..."
sleep 2

# Create user using local connection (no port specification needed)
echo "Creating user..."
mongosh --eval "
const adminDb = db.getSiblingDB('admin');
try {
adminDb.createUser({
user: 'mongotUser',
pwd: 'mongotPassword',
roles: [{ role: 'searchCoordinator', db: 'admin' }]
});
print('User mongotUser created successfully');
} catch (error) {
if (error.code === 11000) {
print('User mongotUser already exists');
} else {
print('Error creating user: ' + error);
}
}
"

echo "MongoDB initialization completed."
16 changes: 16 additions & 0 deletions .evergreen/mongodb-community-search/mongod.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# mongod.conf
storage:
dbPath: /data/db

net:
port: 27017
bindIp: 0.0.0.0

setParameter:
searchIndexManagementHostAndPort: mongot.search-community:27028
mongotHost: mongot.search-community:27028
skipAuthenticationToSearchIndexManagementServer: false
useGrpcForSearch: true

replication:
replSetName: rs0
27 changes: 27 additions & 0 deletions .evergreen/mongodb-community-search/mongot.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
syncSource:
replicaSet:
hostAndPort: "mongod.search-community:27017"
username: mongotUser
passwordFile: /mongot-community/pwfile
authSource: admin
tls: false
readPreference: primaryPreferred
storage:
dataPath: "data/mongot"
server:
grpc:
address: "mongot.search-community:27028"
tls:
mode: "disabled"
metrics:
enabled: true
address: "mongot.search-community:9946"
healthCheck:
address: "mongot.search-community:8080"
logging:
verbosity: INFO
embedding:
queryKeyFile: /etc/voyage-api-query-key
indexingKeyFile: /etc/voyage-api-indexing-key
providerEndpoint: https://api.voyageai.com/v1/embeddings
isAutoEmbeddingViewWriter: true
21 changes: 21 additions & 0 deletions .evergreen/mongodb-community-search/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
set -eu

SCRIPT_DIR=$(realpath "$(dirname ${BASH_SOURCE[0]})")
ROOT_DIR=$(dirname "$(dirname $SCRIPT_DIR)")

. $ROOT_DIR/env.sh
. $ROOT_DIR/.evergreen/utils.sh

PYTHON_BINARY=$(find_python3)

pushd $SCRIPT_DIR

$PYTHON_BINARY -m venv .venv

source .venv/bin/activate

pip install pymongo

python self_test.py
popd
86 changes: 86 additions & 0 deletions .evergreen/mongodb-community-search/self_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import os
from pymongo import MongoClient
from pymongo.operations import SearchIndexModel
from time import sleep

print("Beginning simple test of vectorSearch with autoEmbed index.")

# Connect and create collection
client = MongoClient(os.environ["MONGODB_URI"])
db = client.self_test
movies = db.create_collection("movies")

# Create auto-embed index (public preview-style syntax)
movies.create_search_index(
model=SearchIndexModel(
name="auto_embed_plot_index",
type="vectorSearch",
definition={
"fields": [
{
"type": "autoEmbed",
"path": "plot",
"model": "voyage-4",
"modality": "text",
},
],
},
)
)
sleep(10)

# Insert documents
movies.insert_many(
[
{
"cast": ["Cillian Murphy", "Emily Blunt", "Matt Damon"],
"director": "Christopher Nolan",
"genres": ["Biography", "Drama", "History"],
"imdb": {
"rating": 8.3,
"votes": 680000,
},
"plot": "The story of American scientist J. Robert Oppenheimer and his role in the development of the atomic bomb during World War II.",
"runtime": 180,
"title": "Oppenheimer",
"year": 2023,
},
{
"cast": ["Andrew Garfield", "Claire Foy", "Hugh Bonneville"],
"director": "Andy Serkis",
"genres": ["Biography", "Drama", "Romance"],
"imdb": {
"rating": 7.2,
"votes": 42000,
},
"plot": "The inspiring true love story of Robin and Diana Cavendish, an adventurous couple who refuse to give up in the face of a devastating disease.",
"runtime": 118,
"title": "Breathe",
"year": 2017,
},
]
)
sleep(10)

# Run vector search aggregation using auto-embed index
search_results = list(
movies.aggregate(
[
{
"$vectorSearch": {
"index": "auto_embed_plot_index",
"path": "plot",
"query": {"text": "movie about couples"},
"limit": 1,
"numCandidates": 10,
}
}
]
)
)

print(f"{len(search_results)=}")
assert len(search_results) == 1
for doc in search_results:
print(doc)
assert doc["title"] == "Breathe"
Loading