File tree Expand file tree Collapse file tree 3 files changed +59
-4
lines changed Expand file tree Collapse file tree 3 files changed +59
-4
lines changed Original file line number Diff line number Diff line change 1+ name : Publish vLLM XPU images
2+
3+ on :
4+ # NOTE(sd109): Since this is checking out an external
5+ # it's probably safer to leave this as workflow dispatch
6+ # only so that we can manually build images from specific
7+ # refs rather than automatically pulling in the latest
8+ # content from the remote repo.
9+ workflow_dispatch :
10+ inputs :
11+ vllm_ref :
12+ type : string
13+ description : The vLLM GitHub ref (tag, branch or commit) to build.
14+ required : true
15+
16+ jobs :
17+ build_push_xpu_image :
18+ name : Build and push image
19+ runs-on : ubuntu-latest
20+ permissions :
21+ contents : read
22+ id-token : write # needed for signing the images with GitHub OIDC Token
23+ packages : write # required for pushing container images
24+ security-events : write # required for pushing SARIF files
25+ steps :
26+ - name : Check out the vLLM repository
27+ uses : actions/checkout@v4
28+ with :
29+ repository : vllm-project/vllm
30+ ref : ${{ inputs.vllm_ref }}
31+
32+ - name : Login to GitHub Container Registry
33+ uses : docker/login-action@v3
34+ with :
35+ registry : ghcr.io
36+ username : ${{ github.actor }}
37+ password : ${{ secrets.GITHUB_TOKEN }}
38+
39+ - name : Build and push image
40+ run : |
41+ IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }}
42+ docker build -f docker/Dockerfile.xpu -t $IMAGE --shm-size=4g .
43+ docker push $IMAGE
Original file line number Diff line number Diff line change 1919 spec :
2020 containers :
2121 - name : {{ .Release.Name }}-api
22- {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}}
23- image : {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
22+ {{- if eq (.Values.api.gpus | int) 0 }}
23+ image : " ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}"
24+ {{- else if .Values.api.intelXPUsEnabled }}
25+ image : " ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}"
26+ {{- else }}
27+ image : " vllm/vllm-openai:{{ .Values.api.image.version }}"
28+ {{- end }}
2429 ports :
2530 - name : api
2631 containerPort : 8000
6166 periodSeconds : 10
6267 resources :
6368 limits :
69+ {{- if .Values.api.intelXPUsEnabled }}
70+ gpu.intel.com/i915 : {{ .Values.api.gpus | int }}
71+ {{- else }}
6472 nvidia.com/gpu : {{ .Values.api.gpus | int }}
73+ {{- end }}
6574 volumes :
6675 - name : data
6776 {{- .Values.api.cacheVolume | toYaml | nindent 10 }}
Original file line number Diff line number Diff line change 3333 enabled : true
3434 # Container image config
3535 image :
36- # Defaults to vllm/vllm-openai when api.gpus > 0
37- # or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
36+ # Defaults to vllm/vllm-openai when api.gpus > 0,
37+ # ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
38+ # or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0
3839 repository :
3940 version : v0.8.5.post1
4041 monitoring :
8081 # distributed / multi-GPU support should be available, though it
8182 # has not been tested against this app.
8283 gpus : 1
84+ # Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
85+ intelXPUsEnabled : false
8386 # The update strategy to use for the deployment
8487 # See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
8588 # NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.
You can’t perform that action at this time.
0 commit comments