From 64b6373053273f86cbbd2e302b5d1b12c3b4af56 Mon Sep 17 00:00:00 2001 From: PatrykWo Date: Fri, 28 Nov 2025 10:28:57 +0200 Subject: [PATCH 1/7] initial test Signed-off-by: PatrykWo --- .cd/docker-compose.yml | 3 ++- .cd/server/server_scenarios_text.yaml | 2 +- .cd/server/settings_vllm.csv | 1 + .cd/templates/template_vllm_server.sh | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index 292f031af..64830fdf1 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -13,7 +13,8 @@ services: env_file: - ./server/server_user.env volumes: - - /mnt/hf_cache:/mnt/hf_cache + ## - /mnt/hf_cache:/mnt/hf_cache + - /mnt/weka:/mnt/hf_cache ports: - "8000:8000" cap_add: diff --git a/.cd/server/server_scenarios_text.yaml b/.cd/server/server_scenarios_text.yaml index 1a64046ed..482196822 100644 --- a/.cd/server/server_scenarios_text.yaml +++ b/.cd/server/server_scenarios_text.yaml @@ -3,7 +3,7 @@ deepSeek-R1-Distill-Llama-70B: TENSOR_PARALLEL_SIZE: 4 llama31_8b_instruct: - MODEL: meta-llama/Llama-3.1-8B-Instruct + MODEL: Llama-3.1-8B-Instruct TENSOR_PARALLEL_SIZE: 1 llama31_70b_instruct: diff --git a/.cd/server/settings_vllm.csv b/.cd/server/settings_vllm.csv index d29161b0a..b2d930f15 100644 --- a/.cd/server/settings_vllm.csv +++ b/.cd/server/settings_vllm.csv @@ -16,3 +16,4 @@ Qwen/Qwen2.5-7B-Instruct,1,4352,128,2,15231233024,2,2,14.18519115,0,10,5,128,1,3 ibm-granite/granite-8b-code-instruct-4k,1,4096,128,2,21474836480,2,2,20,0,10,8,128,1,32,1,32,128,256,1,128,256,1,36,4096,8,32,2,32768,1,FALSE,FALSE,2048,FALSE,TRUE,TRUE,1,0 ibm-granite/granite-20b-code-instruct-8k,1,4352,128,2,40133986304,2,2,37.37,0,10,4,128,1,32,1,32,128,256,1,128,256,1,52,6144,1,48,2,65536,1,FALSE,FALSE,2048,FALSE,TRUE,TRUE,1,0 Qwen/Qwen2.5-VL-7B-Instruct,1,8448,128,2,15231233024,2,2,14.18519115,0,12,4,128,1,32,1,32,128,256,1,128,256,1,28,3584,4,28,2,32768,1,FALSE,FALSE,2048,FALSE,FALSE,FALSE,1,0 +/mnt/hf_cache/data/pytorch/llama3.1/Meta-Llama-3.1-8B-Instruct/,1,4352,128,2,16060522496,2,2,14.95752716,0,10,9,128,1,32,1,32,128,256,1,128,256,1,32,4096,8,32,2,131072,1,FALSE,FALSE,2048,FALSE,TRUE,TRUE,1,0 diff --git a/.cd/templates/template_vllm_server.sh b/.cd/templates/template_vllm_server.sh index 6e793e819..806a2f801 100644 --- a/.cd/templates/template_vllm_server.sh +++ b/.cd/templates/template_vllm_server.sh @@ -15,7 +15,7 @@ vllm serve $MODEL \ --block-size $BLOCK_SIZE \ --dtype $DTYPE \ --tensor-parallel-size $TENSOR_PARALLEL_SIZE \ - --download_dir $HF_HOME \ + --download_dir $MODEL \ --max-model-len $MAX_MODEL_LEN \ --gpu-memory-utilization $GPU_MEM_UTILIZATION \ --max-num-seqs $MAX_NUM_SEQS \ From 2d44711cc0a45b4fb4c14124e5a2d17acb5c6e40 Mon Sep 17 00:00:00 2001 From: PatrykWo Date: Mon, 1 Dec 2025 12:13:36 +0200 Subject: [PATCH 2/7] Add local path option for hf_cache Signed-off-by: PatrykWo --- .cd/docker-compose.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index 64830fdf1..5af1b9971 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -13,8 +13,7 @@ services: env_file: - ./server/server_user.env volumes: - ## - /mnt/hf_cache:/mnt/hf_cache - - /mnt/weka:/mnt/hf_cache + - ${LOCAL_PATH:-/tmp/hf_cache}:/mnt/hf_cache ports: - "8000:8000" cap_add: From 84b89ee17af284e78db0d508895edc7d67a5b7b9 Mon Sep 17 00:00:00 2001 From: PatrykWo Date: Mon, 1 Dec 2025 11:21:04 +0100 Subject: [PATCH 3/7] Revert "Add local path option for hf_cache" This reverts commit 2d44711cc0a45b4fb4c14124e5a2d17acb5c6e40. --- .cd/docker-compose.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index 5af1b9971..64830fdf1 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -13,7 +13,8 @@ services: env_file: - ./server/server_user.env volumes: - - ${LOCAL_PATH:-/tmp/hf_cache}:/mnt/hf_cache + ## - /mnt/hf_cache:/mnt/hf_cache + - /mnt/weka:/mnt/hf_cache ports: - "8000:8000" cap_add: From 628e9ccdad20d6d32e016a609fbcdb91aa93250f Mon Sep 17 00:00:00 2001 From: PatrykWo Date: Mon, 1 Dec 2025 11:21:33 +0100 Subject: [PATCH 4/7] Revert "initial test" This reverts commit 64b6373053273f86cbbd2e302b5d1b12c3b4af56. --- .cd/docker-compose.yml | 3 +-- .cd/server/server_scenarios_text.yaml | 2 +- .cd/server/settings_vllm.csv | 1 - .cd/templates/template_vllm_server.sh | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index 64830fdf1..292f031af 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -13,8 +13,7 @@ services: env_file: - ./server/server_user.env volumes: - ## - /mnt/hf_cache:/mnt/hf_cache - - /mnt/weka:/mnt/hf_cache + - /mnt/hf_cache:/mnt/hf_cache ports: - "8000:8000" cap_add: diff --git a/.cd/server/server_scenarios_text.yaml b/.cd/server/server_scenarios_text.yaml index 482196822..1a64046ed 100644 --- a/.cd/server/server_scenarios_text.yaml +++ b/.cd/server/server_scenarios_text.yaml @@ -3,7 +3,7 @@ deepSeek-R1-Distill-Llama-70B: TENSOR_PARALLEL_SIZE: 4 llama31_8b_instruct: - MODEL: Llama-3.1-8B-Instruct + MODEL: meta-llama/Llama-3.1-8B-Instruct TENSOR_PARALLEL_SIZE: 1 llama31_70b_instruct: diff --git a/.cd/server/settings_vllm.csv b/.cd/server/settings_vllm.csv index b2d930f15..d29161b0a 100644 --- a/.cd/server/settings_vllm.csv +++ b/.cd/server/settings_vllm.csv @@ -16,4 +16,3 @@ Qwen/Qwen2.5-7B-Instruct,1,4352,128,2,15231233024,2,2,14.18519115,0,10,5,128,1,3 ibm-granite/granite-8b-code-instruct-4k,1,4096,128,2,21474836480,2,2,20,0,10,8,128,1,32,1,32,128,256,1,128,256,1,36,4096,8,32,2,32768,1,FALSE,FALSE,2048,FALSE,TRUE,TRUE,1,0 ibm-granite/granite-20b-code-instruct-8k,1,4352,128,2,40133986304,2,2,37.37,0,10,4,128,1,32,1,32,128,256,1,128,256,1,52,6144,1,48,2,65536,1,FALSE,FALSE,2048,FALSE,TRUE,TRUE,1,0 Qwen/Qwen2.5-VL-7B-Instruct,1,8448,128,2,15231233024,2,2,14.18519115,0,12,4,128,1,32,1,32,128,256,1,128,256,1,28,3584,4,28,2,32768,1,FALSE,FALSE,2048,FALSE,FALSE,FALSE,1,0 -/mnt/hf_cache/data/pytorch/llama3.1/Meta-Llama-3.1-8B-Instruct/,1,4352,128,2,16060522496,2,2,14.95752716,0,10,9,128,1,32,1,32,128,256,1,128,256,1,32,4096,8,32,2,131072,1,FALSE,FALSE,2048,FALSE,TRUE,TRUE,1,0 diff --git a/.cd/templates/template_vllm_server.sh b/.cd/templates/template_vllm_server.sh index 806a2f801..6e793e819 100644 --- a/.cd/templates/template_vllm_server.sh +++ b/.cd/templates/template_vllm_server.sh @@ -15,7 +15,7 @@ vllm serve $MODEL \ --block-size $BLOCK_SIZE \ --dtype $DTYPE \ --tensor-parallel-size $TENSOR_PARALLEL_SIZE \ - --download_dir $MODEL \ + --download_dir $HF_HOME \ --max-model-len $MAX_MODEL_LEN \ --gpu-memory-utilization $GPU_MEM_UTILIZATION \ --max-num-seqs $MAX_NUM_SEQS \ From d41d02eabbbfce15b878c014b64283417dc77e8e Mon Sep 17 00:00:00 2001 From: PatrykWo Date: Mon, 1 Dec 2025 12:22:53 +0200 Subject: [PATCH 5/7] Add local_path option for hf_cache Signed-off-by: PatrykWo --- .cd/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index 292f031af..5af1b9971 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -13,7 +13,7 @@ services: env_file: - ./server/server_user.env volumes: - - /mnt/hf_cache:/mnt/hf_cache + - ${LOCAL_PATH:-/tmp/hf_cache}:/mnt/hf_cache ports: - "8000:8000" cap_add: From 36ad6f2162e4898d0e7e70795dd3a844a6b2793b Mon Sep 17 00:00:00 2001 From: PatrykWo Date: Mon, 1 Dec 2025 15:53:59 +0200 Subject: [PATCH 6/7] Fix local_path default value in docker-compose.yml Signed-off-by: PatrykWo --- .cd/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index 5af1b9971..1ad26ed3e 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -13,7 +13,7 @@ services: env_file: - ./server/server_user.env volumes: - - ${LOCAL_PATH:-/tmp/hf_cache}:/mnt/hf_cache + - ${LOCAL_PATH:-/mnt/hf_cache}:/mnt/hf_cache ports: - "8000:8000" cap_add: From ac5cc41515dd0c3ae3aaeeb74fdfa2ab7e2fbf2d Mon Sep 17 00:00:00 2001 From: Patryk Wolsza Date: Mon, 1 Dec 2025 14:57:24 +0100 Subject: [PATCH 7/7] Update .cd/docker-compose.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Patryk Wolsza --- .cd/docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.cd/docker-compose.yml b/.cd/docker-compose.yml index 1ad26ed3e..9e0b7a2d6 100644 --- a/.cd/docker-compose.yml +++ b/.cd/docker-compose.yml @@ -13,6 +13,8 @@ services: env_file: - ./server/server_user.env volumes: + # LOCAL_PATH specifies the absolute path on the host to be mounted as /mnt/hf_cache inside the container. + # Expected format: absolute path. Defaults to /mnt/hf_cache if not set. - ${LOCAL_PATH:-/mnt/hf_cache}:/mnt/hf_cache ports: - "8000:8000"