Merge pull request #78 from NAG-DevOps/ollama-example

smokhov · web-flow · commit d67b0c58536d · 2025-09-09T17:32:02.000-04:00
add Ollama example
diff --git a/src/llm-examples/ollama/README.md b/src/llm-examples/ollama/README.md
@@ -0,0 +1,71 @@
+# Ollama on HPC (Speed) Cluster
+
+Ollama is an open-source software tool that simplifies running large language models (LLMs) directly on your local machine.
+
+#### References:
+- [Ollama](https://ollama.com)
+- [Ollama GitHub](https://github.com/ollama/ollama)
+
+## Prerequisites
+Before starting, ensure you have [access](https://nag-devops.github.io/speed-hpc/#requesting-access) to the HPC (Speed) cluster.
+
+## Instructions
+* Clone Speed Github repository
+    ```shell
+    git clone --depth=1 https://github.com/NAG-DevOps/speed-hpc.git
+    ```
+
+* Navigate to ollama directory in `src/llm-examples`
+
+* Run `start_ollama.sh`
+    ```shell
+    sbatch start_ollama.sh
+    ```
+
+    The script will:
+    - Request required resources
+    - Download Ollama tarball and extract it
+    - Add Ollama to user's path and setup environment variables
+
+    ```shell
+    setenv PATH /speed-scratch/$USER/ollama/bin:$PATH
+    ```
+
+    - Start Ollama server with `ollama serve`
+    - Print the ssh command to connect to the server.
+
+    Note: The server is set to run for 3 hours (adjust if needed)
+
+* Open a new terminal window and paste the ssh command to connect to the speed node the server is running on. The command will look like:
+    ```shell
+    ssh -L XXXXX:speed-XX:XXXXX <ENCSusername>@speed.encs.concordia.ca -t ssh speed-XX
+    ```
+
+* Navigate to ollama directory and do a sanity check
+    ```shell
+    setenv PATH /speed-scratch/$USER/ollama/bin:$PATH
+    ollama -v
+    ```
+
+* Run the `run_ollama.sh` script, replace speed-XX with the name of the node the server is running on 
+    ```shell
+    sbatch -w speed-XX run_ollama.sh
+    ```
+
+    The script will:
+    - Request required resources
+    - Set environment variables
+    - Pull a model to run (in this case it's llama3.2)
+    - Create a python environment to run `ollama_demo.py`
+    - Run `ollama_demo.py` which interact with the model 
+
+Optional:
+1. Check if the server is running, replace XXXXX with the port number
+```shell
+curl http://localhost:XXXXX/api/tags
+```
+
+2. Run a model with a prompt
+```shell
+curl -sS http://localhost:56781/api/generate -H "Content-Type: application/json" -d '{"model": "llama3.2","prompt": "why is the sky blue?","stream": false}' | jq -r '.response'
+```
diff --git a/src/llm-examples/ollama/ollama_demo.py b/src/llm-examples/ollama/ollama_demo.py
@@ -0,0 +1,23 @@
+import ollama
+import os
+from pathlib import Path
+
+user = os.getenv("USER")
+host_file = Path(f"/speed-scratch/{user}/ollama/.ollama_host")
+
+ollama_host = host_file.read_text().strip()
+
+client = ollama.Client(host=ollama_host)
+response = client.chat(
+    model='llama3.2',
+    messages=[{
+        'role': 'user',
+        'content': (
+            'What popular operating system, launched in 1991, '
+            'also has its own mascot, Tux the penguin?'
+        )
+    }]
+)
+
+print(f"[Client connected to {ollama_host}]")
+print(response["message"]["content"])
diff --git a/src/llm-examples/ollama/run_ollama.sh b/src/llm-examples/ollama/run_ollama.sh
@@ -0,0 +1,40 @@
+#!/encs/bin/tcsh
+
+#SBATCH --job-name=ollama-client
+#SBATCH --mem=50G
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --mail-type=ALL
+#SBATCH --output=ollama-%J.out
+
+set ODIR = /speed-scratch/$USER/ollama
+setenv PATH /speed-scratch/$USER/ollama/bin:$PATH
+setenv OLLAMA_MODELS $ODIR/models
+setenv OLLAMA_HOST `cat /speed-scratch/$USER/ollama/.ollama_host`
+
+# Sanity check
+ollama -v
+
+# Pull a model
+ollama pull llama3.2
+
+# Create a python environment
+setenv ENV_DIR /speed-scratch/$USER/envs/python-env
+
+if ( ! -d $ENV_DIR ) then
+    echo "Creating python environment..."
+    mkdir -p $ENV_DIR/{tmp,pkgs,cache}
+
+    setenv TMP $ENV_DIR/tmp
+    setenv TMPDIR $ENV_DIR/tmp
+    setenv PIP_CACHE_DIR $ENV_DIR/cache
+
+    python3 -m venv $ENV_DIR
+else
+    echo "Python environment already exists."
+endif
+
+source $ENV_DIR/bin/activate.csh
+pip install -U pip ollama
+
+python ollama_demo.py
diff --git a/src/llm-examples/ollama/start_ollama.sh b/src/llm-examples/ollama/start_ollama.sh
@@ -0,0 +1,52 @@
+#!/encs/bin/tcsh
+
+#SBATCH --job-name=ollama-server
+#SBATCH --mem=50G
+#SBATCH --gpus=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --mail-type=ALL
+#SBATCH --output=ollama-%J.out
+#SBATCH --time=03:00:00 ## Adjust based on your needs
+
+set ODIR = /speed-scratch/$USER/ollama
+mkdir -p $ODIR && cd $ODIR
+
+# Download Ollama tarball and extract it once
+if ( ! -x $ODIR/bin/ollama ) then
+    echo "Downloading Ollama..."
+    curl -LO https://ollama.com/download/ollama-linux-amd64.tgz
+    tar -xzf ollama-linux-amd64.tgz
+endif
+
+# Add ollama to your PATH and set models directory
+setenv PATH $ODIR/bin:$PATH
+setenv OLLAMA_MODELS $ODIR/models
+mkdir -p $OLLAMA_MODELS
+
+# Ollama by default listens on 127.0.0.1:11434, the port however can be overwritten
+set PORT = `python -c 'import socket,sys; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()'`
+setenv OLLAMA_HOST 127.0.0.1:$PORT
+echo "http://localhost:$PORT" >! ${ODIR}/.ollama_host
+
+# Print connection instructions
+set NODE = `hostname -s`
+set USER = `whoami`
+echo ""
+echo "===================================================="
+echo " Ollama server will start on $NODE"
+echo "===================================================="
+echo "To connect from your laptop, open a new terminal and run:"
+
+echo ""
+echo " ssh -L ${PORT}:${NODE}:${PORT} ${USER}@speed.encs.concordia.ca -t ssh $NODE"
+echo ""
+echo "Once connected, set your environment variables:"
+echo "  setenv PATH ${ODIR}/bin:$PATH"
+echo "  setenv OLLAMA_HOST http://localhost:${PORT}"
+echo "  setenv OLLAMA_MODELS ${ODIR}/models"
+echo "===================================================="
+echo ""
+
+# Start server
+srun ollama serve