Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .project/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
services:
llm-nim:
image: nvcr.io/nim/meta/llama-3.1-8b-instruct:latest
environment:
- NGC_API_KEY=${NGC_API_KEY}
- NIM_DISABLE_CUDA_GRAPH=1
- NIM_VLLM_FLAGS=--enforce-eager
- NIM_MAX_MODEL_LEN=16384
ports:
- "8000:8000"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]

embed-nim:
image: nvcr.io/nim/nvidia/llama-3.2-nemoretriever-300m-embed-v1:latest
environment:
- NGC_API_KEY=${NGC_API_KEY}
- NIM_DISABLE_CUDA_GRAPH=1
ports:
- "8001:8000"
23 changes: 21 additions & 2 deletions .project/spec.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# spec.yaml
specVersion: v2
specMinorVersion: 2
meta:
Expand Down Expand Up @@ -34,6 +35,20 @@ environment:
entrypoint_script: ""
labels:
- cuda12.2
#environment:
# ... keep your existing base image settings ...
variables:
- name: NVIDIA_LLM_URL
value: "http://host.docker.internal:8000/v1"
- name: NVIDIA_EMBED_URL
value: "http://host.docker.internal:8001/v1"
- name: NIM_MAX_MODEL_LEN
value: "16384"
# Since you're on Spark, ensure the key is passed for local auth
- name: NGC_API_KEY
value: " "
# This points AI Workbench to your docker-compose file for sidecar NIMs
compose_file_path: "docker-compose.yaml"
apps:
- name: jupyterlab
type: jupyterlab
Expand Down Expand Up @@ -150,10 +165,14 @@ execution:
proxy:
trim_prefix: false
url: http://localhost:8501
# resources:
# gpu:
# requested: 0
# sharedMemoryMB: 1024
resources:
gpu:
requested: 0
sharedMemoryMB: 1024
requested: 1
sharedMemoryMB: 16384 # Increased for Spark's unified memory usage
secrets:
- variable: NGC_API_KEY
description: NGC Personal Key from https://org.ngc.nvidia.com/setup/personal-keys
Expand Down
12 changes: 12 additions & 0 deletions code/chain_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# Use the environment variables we set in spec.yaml
llm = ChatNVIDIA(
base_url=os.getenv("NVIDIA_LLM_URL"),
model=os.getenv("LLM_MODEL_NAME"),
temperature=0.1
)

embedder = NVIDIAEmbeddings(
base_url=os.getenv("NVIDIA_EMBED_URL"),
model=os.getenv("EMBEDDING_MODEL_NAME")
)

"""The definition of the NVIDIA Conversational RAG API server."""

import os
Expand Down
1,421 changes: 1,407 additions & 14 deletions code/upload-pdfs.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,3 @@ volumes:
milvus:
redis:
nim-cache:

1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ redis==5.2.1
sse-starlette==2.2.1
uvicorn==0.34.0
watchfiles==1.0.3
pydantic==2.10.6
8 changes: 8 additions & 0 deletions variables.env
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ APP_MILVUS__URL=http://milvus:19530
APP_REDIS_DSN=redis://redis:6379/0
NGC_CLI_ORG=default
NGC_HOME=~/.cache/nvidia-nims
NGC_API_KEY=""

LLM_NIM_0_MODEL=meta/llama3-8b-instruct
LLM_NIM_0_NIM_VERSION=1.0.0
Expand All @@ -24,3 +25,10 @@ NV_EMBEDQA_E5_V5_NIM_VERSION=1.0.1
NV_EMBEDQA_E5_V5_NIM_GPUS=all

SHELL=/bin/bash

# Hardcode these to ensure the Python app ignores the cloud defaults
NVIDIA_LLM_URL=http://host.docker.internal:8000/v1
NVIDIA_EMBED_URL=http://host.docker.internal:8001/v1
LLM_MODEL_NAME=meta/llama-3.1-8b-instruct
EMBEDDING_MODEL_NAME=nvidia/llama-3.2-nemoretriever-300m-embed-v1