Skip to content

Commit e677dd1

Browse files
authored
Fix in docker compose functionality for v1-plugin (#185)
Signed-off-by: PatrykWo <[email protected]>
1 parent b94548a commit e677dd1

File tree

4 files changed

+8
-13
lines changed

4 files changed

+8
-13
lines changed

.cd/Dockerfile.rhel.tenc.pytorch.vllm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ RUN mkdir -p $VLLM_PATH2 && \
6161
# Install additional Python packages
6262
RUN pip3 install datasets pandas
6363

64-
# Copy utility scripts and configuration /// to be enabled later PWolsza
64+
# Copy utility scripts and configuration
6565
RUN mkdir -p /root/scripts/
6666
COPY templates /root/scripts/templates/
6767
COPY entrypoints /root/scripts/entrypoints/

.cd/Dockerfile.ubuntu.pytorch.vllm

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ ARG REPO_TYPE=habanalabs
1212
FROM ${DOCKER_URL}/${VERSION}/${BASE_NAME}/${REPO_TYPE}/pytorch-installer-${PT_VERSION}:${REVISION}
1313

1414
# Parameterize commit/branch for vllm-project & vllm-gaudi checkout
15-
ARG VLLM_GAUDI_COMMIT=v0.10.1
16-
ARG VLLM_PROJECT_COMMIT=v0.10.1
15+
ARG VLLM_GAUDI_COMMIT=v0.10.2_next
16+
ARG VLLM_PROJECT_COMMIT=v0.10.2
1717

1818
ENV OMPI_MCA_btl_vader_single_copy_mechanism=none
1919

@@ -38,18 +38,16 @@ RUN mkdir -p $VLLM_PATH && \
3838
git fetch upstream --tags || true && \
3939
git checkout ${VLLM_PROJECT_COMMIT} && \
4040
bash -c "pip install -r <(sed '/^[torch]/d' requirements/build.txt)" && \
41-
VLLM_TARGET_DEVICE=empty pip install --no-build-isolation -e .
41+
VLLM_TARGET_DEVICE=empty pip install --no-build-isolation .
4242

4343
# Clone the vllm-gaudi repository and install inside the container
4444

4545
RUN mkdir -p $VLLM_PATH2 && \
4646
git clone https://github.com/vllm-project/vllm-gaudi.git $VLLM_PATH2 && \
4747
cd $VLLM_PATH2 && \
48-
git checkout ${VLLM_GAUDI_COMMIT} && \
49-
VLLM_TARGET_DEVICE=hpu && pip install -v -e $VLLM_PATH2 --no-build-isolation
50-
51-
# to be enabled later PWolsza
52-
# pip install -v -e $VLLM_PATH2/tests/vllm_test_utils
48+
# Comment: enable if vllm-gaudi release version is used otherwise main
49+
git checkout ${VLLM_GAUDI_COMMIT} && \
50+
VLLM_TARGET_DEVICE=hpu && pip install -v $VLLM_PATH2 --no-build-isolation
5351

5452
# Install additional Python packages
5553
RUN pip install datasets && \

.cd/server/settings_vllm.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
MODEL,TENSOR_PARALLEL_SIZE,MAX_MODEL_LEN,TOTAL_GPU_MEM,UNAVAILABLE_MEM_ABS,MODEL_MEM_FROM_CONFIG,MODEL_DTYPE,QUANT_DTYPE,MODEL_MEM,PROFILER_MEM_OVERHEAD,APPROX_MEM_PER_GRAPH_MB,fsdpa,GPU_FREE_MEM_TARGET,BLOCK_SIZE,VLLM_PROMPT_BS_BUCKET_MIN,VLLM_PROMPT_BS_BUCKET_STEP,VLLM_DECODE_BS_BUCKET_MIN,VLLM_DECODE_BS_BUCKET_STEP,VLLM_PROMPT_SEQ_BUCKET_MIN,VLLM_PROMPT_SEQ_BUCKET_STEP,VLLM_DECODE_BLOCK_BUCKET_MIN,VLLM_DECODE_BLOCK_BUCKET_STEP,MAX_NUM_PREFILL_SEQS,NUM_HIDDEN_LAYERS,HIDDEN_SIZE,NUM_KEY_VALUE_HEADS,NUM_ATTENTION_HEADS,CACHE_DTYPE_BYTES,LIMIT_MODEL_LEN,PT_HPU_LAZY_MODE,VLLM_DELAYED_SAMPLING,VLLM_SKIP_WARMUP,EXPERIMENTAL_WEIGHT_SHARING,VLLM_EXPONENTIAL_BUCKETING
2-
meta-llama/Llama-3.1-8B-Instruct,1,4352,128,2,16060522496,2,2,14.95752716,5.5,10,1,1,128,1,32,1,32,128,256,128,256,16,32,4096,8,32,2,131072,1,TRUE,FALSE,0,FALSE
2+
meta-llama/Llama-3.1-8B-Instruct,1,4352,128,2,16060522496,2,2,14.95752716,5.5,10,1,1,128,1,32,1,32,128,256,128,256,16,32,4096,8,32,2,131072,1,TRUE,FALSE,0,TRUE
33
meta-llama/Llama-3.1-70B-Instruct,4,4352,512,2,1.41107E+11,2,2,131.4165192,5.5,10,1,1,128,1,32,1,32,128,256,128,256,16,80,8192,8,64,2,131072,1,TRUE,FALSE,0,FALSE
44
meta-llama/Llama-3.3-70B-Instruct,4,4352,512,2,1.41107E+11,2,2,131.4165192,5.5,10,1,1,128,1,32,1,32,128,256,128,256,16,80,8192,8,64,2,131072,1,TRUE,FALSE,0,FALSE
55
meta-llama/Llama-3.2-1B-Instruct,1,4352,128,2,2471645608,2,2,2.301899351,5.5,10,1,1,128,1,32,1,32,128,256,128,256,16,16,2048,8,32,2,131072,1,TRUE,FALSE,0,FALSE

.cd/templates/template_vllm_server.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@ vllm serve $MODEL \
1010
--download_dir $HF_HOME \
1111
--max-model-len $MAX_MODEL_LEN \
1212
--gpu-memory-utilization $GPU_MEM_UTILIZATION \
13-
--use-padding-aware-scheduling \
1413
--max-num-seqs $MAX_NUM_SEQS \
15-
--max-num-prefill-seqs $MAX_NUM_PREFILL_SEQS \
16-
--num-scheduler-steps 1 \
1714
--disable-log-requests \
1815
2>&1 | tee -a logs/vllm_server.log

0 commit comments

Comments
 (0)