diff --git a/src/llm-examples/ollama/README.md b/src/llm-examples/ollama/README.md new file mode 100644 index 0000000..1ddf127 --- /dev/null +++ b/src/llm-examples/ollama/README.md @@ -0,0 +1,71 @@ +# Ollama on HPC (Speed) Cluster + +Ollama is an open-source software tool that simplifies running large language models (LLMs) directly on your local machine. + +#### References: +- [Ollama](https://ollama.com) +- [Ollama GitHub](https://github.com/ollama/ollama) + +## Prerequisites +Before starting, ensure you have [access](https://nag-devops.github.io/speed-hpc/#requesting-access) to the HPC (Speed) cluster. + +## Instructions +* Clone Speed Github repository + ```shell + git clone --depth=1 https://github.com/NAG-DevOps/speed-hpc.git + ``` + +* Navigate to ollama directory in `src/llm-examples` + +* Run `start_ollama.sh` + ```shell + sbatch start_ollama.sh + ``` + + The script will: + - Request required resources + - Download Ollama tarball and extract it + - Add Ollama to user's path and setup environment variables + + ```shell + setenv PATH /speed-scratch/$USER/ollama/bin:$PATH + ``` + + - Start Ollama server with `ollama serve` + - Print the ssh command to connect to the server. + + Note: The server is set to run for 3 hours (adjust if needed) + +* Open a new terminal window and paste the ssh command to connect to the speed node the server is running on. The command will look like: + ```shell + ssh -L XXXXX:speed-XX:XXXXX @speed.encs.concordia.ca -t ssh speed-XX + ``` + +* Navigate to ollama directory and do a sanity check + ```shell + setenv PATH /speed-scratch/$USER/ollama/bin:$PATH + ollama -v + ``` + +* Run the `run_ollama.sh` script, replace speed-XX with the name of the node the server is running on + ```shell + sbatch -w speed-XX run_ollama.sh + ``` + + The script will: + - Request required resources + - Set environment variables + - Pull a model to run (in this case it's llama3.2) + - Create a python environment to run `ollama_demo.py` + - Run `ollama_demo.py` which interact with the model + +Optional: +1. Check if the server is running, replace XXXXX with the port number +```shell +curl http://localhost:XXXXX/api/tags +``` + +2. Run a model with a prompt +```shell +curl -sS http://localhost:56781/api/generate -H "Content-Type: application/json" -d '{"model": "llama3.2","prompt": "why is the sky blue?","stream": false}' | jq -r '.response' +``` diff --git a/src/llm-examples/ollama/ollama_demo.py b/src/llm-examples/ollama/ollama_demo.py new file mode 100644 index 0000000..ebb984a --- /dev/null +++ b/src/llm-examples/ollama/ollama_demo.py @@ -0,0 +1,23 @@ +import ollama +import os +from pathlib import Path + +user = os.getenv("USER") +host_file = Path(f"/speed-scratch/{user}/ollama/.ollama_host") + +ollama_host = host_file.read_text().strip() + +client = ollama.Client(host=ollama_host) +response = client.chat( + model='llama3.2', + messages=[{ + 'role': 'user', + 'content': ( + 'What popular operating system, launched in 1991, ' + 'also has its own mascot, Tux the penguin?' + ) + }] +) + +print(f"[Client connected to {ollama_host}]") +print(response["message"]["content"]) diff --git a/src/llm-examples/ollama/run_ollama.sh b/src/llm-examples/ollama/run_ollama.sh new file mode 100755 index 0000000..5d43750 --- /dev/null +++ b/src/llm-examples/ollama/run_ollama.sh @@ -0,0 +1,40 @@ +#!/encs/bin/tcsh + +#SBATCH --job-name=ollama-client +#SBATCH --mem=50G +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --mail-type=ALL +#SBATCH --output=ollama-%J.out + +set ODIR = /speed-scratch/$USER/ollama +setenv PATH /speed-scratch/$USER/ollama/bin:$PATH +setenv OLLAMA_MODELS $ODIR/models +setenv OLLAMA_HOST `cat /speed-scratch/$USER/ollama/.ollama_host` + +# Sanity check +ollama -v + +# Pull a model +ollama pull llama3.2 + +# Create a python environment +setenv ENV_DIR /speed-scratch/$USER/envs/python-env + +if ( ! -d $ENV_DIR ) then + echo "Creating python environment..." + mkdir -p $ENV_DIR/{tmp,pkgs,cache} + + setenv TMP $ENV_DIR/tmp + setenv TMPDIR $ENV_DIR/tmp + setenv PIP_CACHE_DIR $ENV_DIR/cache + + python3 -m venv $ENV_DIR +else + echo "Python environment already exists." +endif + +source $ENV_DIR/bin/activate.csh +pip install -U pip ollama + +python ollama_demo.py diff --git a/src/llm-examples/ollama/start_ollama.sh b/src/llm-examples/ollama/start_ollama.sh new file mode 100755 index 0000000..a4b6f1e --- /dev/null +++ b/src/llm-examples/ollama/start_ollama.sh @@ -0,0 +1,52 @@ +#!/encs/bin/tcsh + +#SBATCH --job-name=ollama-server +#SBATCH --mem=50G +#SBATCH --gpus=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=4 +#SBATCH --mail-type=ALL +#SBATCH --output=ollama-%J.out +#SBATCH --time=03:00:00 ## Adjust based on your needs + +set ODIR = /speed-scratch/$USER/ollama +mkdir -p $ODIR && cd $ODIR + +# Download Ollama tarball and extract it once +if ( ! -x $ODIR/bin/ollama ) then + echo "Downloading Ollama..." + curl -LO https://ollama.com/download/ollama-linux-amd64.tgz + tar -xzf ollama-linux-amd64.tgz +endif + +# Add ollama to your PATH and set models directory +setenv PATH $ODIR/bin:$PATH +setenv OLLAMA_MODELS $ODIR/models +mkdir -p $OLLAMA_MODELS + +# Ollama by default listens on 127.0.0.1:11434, the port however can be overwritten +set PORT = `python -c 'import socket,sys; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()'` +setenv OLLAMA_HOST 127.0.0.1:$PORT +echo "http://localhost:$PORT" >! ${ODIR}/.ollama_host + +# Print connection instructions +set NODE = `hostname -s` +set USER = `whoami` +echo "" +echo "====================================================" +echo " Ollama server will start on $NODE" +echo "====================================================" +echo "To connect from your laptop, open a new terminal and run:" + +echo "" +echo " ssh -L ${PORT}:${NODE}:${PORT} ${USER}@speed.encs.concordia.ca -t ssh $NODE" +echo "" +echo "Once connected, set your environment variables:" +echo " setenv PATH ${ODIR}/bin:$PATH" +echo " setenv OLLAMA_HOST http://localhost:${PORT}" +echo " setenv OLLAMA_MODELS ${ODIR}/models" +echo "====================================================" +echo "" + +# Start server +srun ollama serve