From c626fbce93c2d2df4d230427fa9ef9fa7688a3ff Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 13:23:07 +0100 Subject: [PATCH 01/25] ci(workflows): bump GitHub Actions images to Ubuntu 24.04 Signed-off-by: Alessandro Sturniolo --- .github/workflows/backend.yml | 330 ++++++++++++-------- .github/workflows/generate_grpc_cache.yaml | 2 +- .github/workflows/generate_intel_image.yaml | 2 +- .github/workflows/image-pr.yml | 10 +- .github/workflows/image.yml | 12 +- 5 files changed, 208 insertions(+), 148 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 66b1c683b88f..0c63091007e5 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -52,7 +52,7 @@ jobs: backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -65,7 +65,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -78,7 +78,7 @@ jobs: backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -91,7 +91,7 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -104,7 +104,7 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -112,12 +112,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -125,12 +125,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # CUDA 11 additional backends - build-type: 'cublas' cuda-major-version: "11" @@ -144,7 +144,7 @@ jobs: backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -157,7 +157,7 @@ jobs: backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -170,7 +170,7 @@ jobs: backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -183,7 +183,7 @@ jobs: backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -196,7 +196,7 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -218,12 +218,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rerankers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -231,12 +231,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -244,12 +244,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' runs-on: 'arc-runner-set' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -257,12 +257,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -270,12 +270,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -283,12 +283,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -296,12 +296,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -309,12 +309,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -322,12 +322,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -335,12 +335,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -353,7 +353,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -366,7 +366,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -379,7 +379,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -392,7 +392,7 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -405,7 +405,7 @@ jobs: backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # cuda 13 - build-type: 'cublas' cuda-major-version: "13" @@ -419,7 +419,7 @@ jobs: backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -445,7 +445,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -471,7 +471,7 @@ jobs: backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -484,7 +484,7 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "13" cuda-minor-version: "0" @@ -523,7 +523,7 @@ jobs: backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -536,7 +536,7 @@ jobs: backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -549,7 +549,7 @@ jobs: backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -562,7 +562,7 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -575,7 +575,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -601,7 +601,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -627,7 +627,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # hipblas builds - build-type: 'hipblas' cuda-major-version: "" @@ -636,12 +636,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-rerankers' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -649,12 +649,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -662,12 +662,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vllm' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -675,12 +675,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-transformers' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -688,12 +688,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-diffusers' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # ROCm additional backends - build-type: 'hipblas' cuda-major-version: "" @@ -702,12 +702,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-kokoro' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -728,12 +728,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -741,12 +741,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-coqui' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -754,12 +754,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-bark' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # sycl builds - build-type: 'intel' cuda-major-version: "" @@ -773,7 +773,7 @@ jobs: backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -786,7 +786,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -799,7 +799,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -812,7 +812,7 @@ jobs: backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -825,7 +825,7 @@ jobs: backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -838,7 +838,7 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -864,7 +864,7 @@ jobs: backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # SYCL additional backends - build-type: 'intel' cuda-major-version: "" @@ -878,7 +878,7 @@ jobs: backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -891,7 +891,7 @@ jobs: backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -917,7 +917,7 @@ jobs: backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -930,7 +930,7 @@ jobs: backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # piper - build-type: '' cuda-major-version: "" @@ -939,12 +939,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-piper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "piper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # bark-cpp - build-type: '' cuda-major-version: "" @@ -953,12 +953,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-bark-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark-cpp" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -966,12 +966,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -984,7 +984,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -992,12 +992,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # Stablediffusion-ggml - build-type: '' cuda-major-version: "" @@ -1006,12 +1006,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "stablediffusion-ggml" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -1024,7 +1036,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1037,7 +1049,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -1050,7 +1062,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -1058,12 +1070,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -1076,7 +1088,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # whisper - build-type: '' cuda-major-version: "" @@ -1085,12 +1097,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -1103,7 +1127,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1116,7 +1140,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -1129,7 +1153,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -1137,12 +1161,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -1155,20 +1179,20 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-whisper' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" runs-on: 'ubuntu-latest' skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' #silero-vad - build-type: '' cuda-major-version: "" @@ -1177,12 +1201,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-silero-vad' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "silero-vad" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # local-store - build-type: '' cuda-major-version: "" @@ -1191,12 +1215,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-local-store' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "local-store" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # huggingface - build-type: '' cuda-major-version: "" @@ -1205,12 +1229,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-huggingface' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "huggingface" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # rfdetr - build-type: '' cuda-major-version: "" @@ -1219,12 +1243,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rfdetr" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -1237,7 +1273,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1250,7 +1286,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1263,7 +1299,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # exllama2 - build-type: '' cuda-major-version: "" @@ -1272,12 +1308,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-exllama2' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "exllama2" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-exllama2' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -1290,7 +1338,7 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1303,7 +1351,7 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1311,12 +1359,12 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-gpu-hipblas-exllama2' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" runs-on: 'ubuntu-latest' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1329,7 +1377,7 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # runs out of space on the runner # - build-type: 'hipblas' # cuda-major-version: "" @@ -1337,7 +1385,7 @@ jobs: # platforms: 'linux/amd64' # tag-latest: 'auto' # tag-suffix: '-gpu-hipblas-rfdetr' - # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # base-image: "rocm/dev-ubuntu-24.04:6.4.4" # runs-on: 'ubuntu-latest' # skip-drivers: 'false' # backend: "rfdetr" @@ -1351,12 +1399,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-kitten-tts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kitten-tts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # neutts - build-type: '' cuda-major-version: "" @@ -1365,12 +1413,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-neutts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "neutts" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-neutts' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1378,12 +1438,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-neutts' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1391,12 +1451,12 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-neutts' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -1404,12 +1464,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-vibevoice' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' backend-jobs-darwin: uses: ./.github/workflows/backend_build_darwin.yml strategy: diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index feadf0948bdc..72a2b306741d 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -16,7 +16,7 @@ jobs: strategy: matrix: include: - - grpc-base-image: ubuntu:22.04 + - grpc-base-image: ubuntu:24.04 runs-on: 'ubuntu-latest' platforms: 'linux/amd64,linux/arm64' runs-on: ${{matrix.runs-on}} diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml index 5c0160addb38..0dc47da211ec 100644 --- a/.github/workflows/generate_intel_image.yaml +++ b/.github/workflows/generate_intel_image.yaml @@ -15,7 +15,7 @@ jobs: strategy: matrix: include: - - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04 + - base-image: intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 runs-on: 'arc-runner-set' platforms: 'linux/amd64' runs-on: ${{matrix.runs-on}} diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 84ffa5a1320c..055f26036cfd 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -42,7 +42,7 @@ jobs: tag-latest: 'false' tag-suffix: '-gpu-nvidia-cuda-12' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" makeflags: "--jobs=3 --output-sync=target" ubuntu-version: '2204' - build-type: 'cublas' @@ -59,8 +59,8 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-hipblas' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - grpc-base-image: "ubuntu:22.04" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" ubuntu-version: '2204' @@ -68,7 +68,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" + grpc-base-image: "ubuntu:24.04" tag-suffix: 'sycl' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" @@ -78,7 +78,7 @@ jobs: tag-latest: 'false' tag-suffix: '-vulkan-core' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" makeflags: "--jobs=4 --output-sync=target" ubuntu-version: '2204' - build-type: 'cublas' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 7389760912c5..8c67434f9d1a 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -40,8 +40,8 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-hipblas' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - grpc-base-image: "ubuntu:22.04" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-hipblas" @@ -76,7 +76,7 @@ jobs: platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' aio: "-aio-cpu" makeflags: "--jobs=4 --output-sync=target" @@ -101,7 +101,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-12" @@ -123,7 +123,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-vulkan" @@ -132,7 +132,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" + grpc-base-image: "ubuntu:24.04" tag-suffix: '-gpu-intel' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" From 168bd8ab7ed5755f6e5b79b4fb4aaf9892c0aa6c Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 13:27:37 +0100 Subject: [PATCH 02/25] ci(workflows): remove CUDA 11.x support from GitHub Actions (incompatible with ubuntu:24.04) Signed-off-by: Alessandro Sturniolo --- .github/workflows/backend.yml | 171 ---------------------------------- .github/workflows/image.yml | 12 --- 2 files changed, 183 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 0c63091007e5..63b9807add60 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -40,58 +40,6 @@ jobs: matrix: include: # CUDA 11 builds - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-rerankers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rerankers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-transformers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "transformers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-diffusers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -130,73 +78,6 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2404' - # CUDA 11 additional backends - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-kokoro' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "kokoro" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "faster-whisper" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-coqui' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "coqui" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-bark' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-chatterbox' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "chatterbox" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -1024,19 +905,6 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1115,19 +983,6 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1261,19 +1116,6 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1326,19 +1168,6 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 8c67434f9d1a..ab69c98ac3c4 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -82,18 +82,6 @@ jobs: makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - makeflags: "--jobs=4 --output-sync=target" - skip-drivers: 'false' - aio: "-aio-gpu-nvidia-cuda-11" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" From 39a5690083e8401789438bb945effa137eaa7d51 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 13:29:46 +0100 Subject: [PATCH 03/25] ci(workflows): bump GitHub Actions CUDA support to 12.9 Signed-off-by: Alessandro Sturniolo --- .github/workflows/backend.yml | 44 +++++++++++++++---------------- .github/workflows/image-pr.yml | 2 +- .github/workflows/image.yml | 4 +-- .github/workflows/image_build.yml | 2 +- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 63b9807add60..be1d8c3eb0f5 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -42,7 +42,7 @@ jobs: # CUDA 11 builds - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-diffusers' @@ -81,7 +81,7 @@ jobs: # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vibevoice' @@ -107,7 +107,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' @@ -120,7 +120,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' @@ -133,7 +133,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' @@ -146,7 +146,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' @@ -159,7 +159,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' @@ -172,7 +172,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' @@ -185,7 +185,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' @@ -198,7 +198,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' @@ -211,7 +211,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' @@ -722,7 +722,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-vibevoice' @@ -855,7 +855,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -895,7 +895,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' @@ -946,7 +946,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -973,7 +973,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' @@ -1024,7 +1024,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -1106,7 +1106,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' @@ -1158,7 +1158,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' @@ -1196,7 +1196,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -1250,7 +1250,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-neutts' @@ -1275,7 +1275,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 055f26036cfd..2db9e5cbafa4 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -37,7 +37,7 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-gpu-nvidia-cuda-12' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index ab69c98ac3c4..ad8ce97bcd4d 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -84,7 +84,7 @@ jobs: ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12' @@ -153,7 +153,7 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 31a1f2310ea4..39cfa1401052 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -23,7 +23,7 @@ on: type: string cuda-minor-version: description: 'CUDA minor version' - default: "4" + default: "9" type: string platforms: description: 'Platforms' From 813f0e6184d10d5541ee45e908d318044ca15da6 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 15:00:22 +0100 Subject: [PATCH 04/25] build(docker): bump base image to ubuntu:24.04 and adjust Vulkan SDK/packages Signed-off-by: Alessandro Sturniolo --- Dockerfile | 38 +++++++++++++++++++++++--------- Dockerfile.aio | 2 +- Makefile | 2 +- backend/Dockerfile.golang | 42 ++++++++++++++++++++++++------------ backend/Dockerfile.llama-cpp | 34 ++++++++++++++++++++++------- backend/Dockerfile.python | 31 ++++++++++++++++++++------ docker-compose.yaml | 2 +- 7 files changed, 110 insertions(+), 41 deletions(-) diff --git a/Dockerfile b/Dockerfile index 28147e75b856..784c575387a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} ARG INTEL_BASE_IMAGE=${BASE_IMAGE} @@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl wget espeak-ng libgomp1 \ - ffmpeg && \ + ffmpeg libopenblas0 libopenblas-dev libquadmath0 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -34,11 +34,30 @@ RUN < /run/localai/capability @@ -141,13 +160,12 @@ ENV PATH=/opt/rocm/bin:${PATH} # The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it. FROM requirements-drivers AS build-requirements -ARG GO_VERSION=1.22.6 +ARG GO_VERSION=1.25.4 ARG CMAKE_VERSION=3.31.10 ARG CMAKE_FROM_SOURCE=false ARG TARGETARCH ARG TARGETVARIANT - RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ @@ -206,7 +224,7 @@ WORKDIR /build FROM ${INTEL_BASE_IMAGE} AS intel RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list +RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list RUN apt-get update && \ apt-get install -y --no-install-recommends \ intel-oneapi-runtime-libs && \ diff --git a/Dockerfile.aio b/Dockerfile.aio index 81063bb4dbeb..ccc2fc94b9ed 100644 --- a/Dockerfile.aio +++ b/Dockerfile.aio @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 FROM ${BASE_IMAGE} diff --git a/Makefile b/Makefile index 6df349eb66d7..57c58f307970 100644 --- a/Makefile +++ b/Makefile @@ -318,7 +318,7 @@ test-extra: prepare-test-extra DOCKER_IMAGE?=local-ai DOCKER_AIO_IMAGE?=local-ai-aio IMAGE_TYPE?=core -BASE_IMAGE?=ubuntu:22.04 +BASE_IMAGE?=ubuntu:24.04 docker: docker build \ diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 1db39c9e1d63..3dc0d8c92988 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 FROM ${BASE_IMAGE} AS builder ARG BACKEND=rerankers @@ -12,8 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} ENV DEBIAN_FRONTEND=noninteractive ARG TARGETARCH ARG TARGETVARIANT -ARG GO_VERSION=1.22.6 -ARG UBUNTU_VERSION=2204 +ARG GO_VERSION=1.25.4 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -40,11 +39,30 @@ RUN < Date: Wed, 3 Dec 2025 19:39:44 +0100 Subject: [PATCH 05/25] fix(backend): correct context paths for Python backends in workflows, Makefile and Dockerfile Signed-off-by: Alessandro Sturniolo --- .github/workflows/backend.yml | 144 +++++++++++++++++----------------- backend/Dockerfile.python | 6 +- 2 files changed, 75 insertions(+), 75 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index be1d8c3eb0f5..2fd2dd504ac8 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -51,7 +51,7 @@ jobs: skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: '' cuda-major-version: "" @@ -64,7 +64,7 @@ jobs: skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./"" ubuntu-version: '2404' - build-type: '' cuda-major-version: "" @@ -77,7 +77,7 @@ jobs: skip-drivers: 'true' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -103,7 +103,7 @@ jobs: skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -129,7 +129,7 @@ jobs: skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -142,7 +142,7 @@ jobs: skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -155,7 +155,7 @@ jobs: skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -168,7 +168,7 @@ jobs: skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -181,7 +181,7 @@ jobs: skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -194,7 +194,7 @@ jobs: skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -207,7 +207,7 @@ jobs: skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -220,7 +220,7 @@ jobs: skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -229,7 +229,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" @@ -242,7 +242,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" @@ -255,11 +255,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -268,11 +268,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -281,11 +281,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-neutts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # cuda 13 - build-type: 'cublas' @@ -295,11 +295,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-rerankers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -321,7 +321,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" @@ -347,11 +347,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-transformers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -360,11 +360,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "13" @@ -391,7 +391,7 @@ jobs: ubuntu-version: '2404' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -399,11 +399,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-kokoro' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -412,11 +412,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -425,11 +425,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-bark' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -438,11 +438,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -451,7 +451,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" @@ -477,7 +477,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" @@ -503,11 +503,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # hipblas builds - build-type: 'hipblas' @@ -521,7 +521,7 @@ jobs: skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -547,7 +547,7 @@ jobs: skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -560,7 +560,7 @@ jobs: skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -573,7 +573,7 @@ jobs: skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # ROCm additional backends - build-type: 'hipblas' @@ -587,7 +587,7 @@ jobs: skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -613,7 +613,7 @@ jobs: skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -626,7 +626,7 @@ jobs: skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -639,7 +639,7 @@ jobs: skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # sycl builds - build-type: 'intel' @@ -653,7 +653,7 @@ jobs: skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" @@ -692,7 +692,7 @@ jobs: skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -705,7 +705,7 @@ jobs: skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -718,7 +718,7 @@ jobs: skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" @@ -744,7 +744,7 @@ jobs: skip-drivers: 'true' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # SYCL additional backends - build-type: 'intel' @@ -758,7 +758,7 @@ jobs: skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -771,7 +771,7 @@ jobs: skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -797,7 +797,7 @@ jobs: skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -810,7 +810,7 @@ jobs: skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # piper - build-type: '' @@ -1102,7 +1102,7 @@ jobs: skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -1115,7 +1115,7 @@ jobs: skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1127,7 +1127,7 @@ jobs: skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" @@ -1140,7 +1140,7 @@ jobs: runs-on: 'ubuntu-24.04-arm' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # exllama2 - build-type: '' @@ -1154,7 +1154,7 @@ jobs: skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -1167,7 +1167,7 @@ jobs: skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1179,7 +1179,7 @@ jobs: skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -1192,7 +1192,7 @@ jobs: runs-on: 'ubuntu-latest' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" @@ -1205,7 +1205,7 @@ jobs: runs-on: 'ubuntu-24.04-arm' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # runs out of space on the runner # - build-type: 'hipblas' @@ -1219,7 +1219,7 @@ jobs: # skip-drivers: 'false' # backend: "rfdetr" # dockerfile: "./backend/Dockerfile.python" - # context: "./backend" + # context: "./" # kitten-tts - build-type: '' cuda-major-version: "" @@ -1232,7 +1232,7 @@ jobs: skip-drivers: 'false' backend: "kitten-tts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # neutts - build-type: '' @@ -1246,7 +1246,7 @@ jobs: skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -1259,7 +1259,7 @@ jobs: skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1271,7 +1271,7 @@ jobs: skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" @@ -1284,7 +1284,7 @@ jobs: runs-on: 'ubuntu-24.04-arm' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: '' cuda-major-version: "" diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index ca589e84045a..b87f7e28762e 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -174,9 +174,9 @@ RUN < Date: Wed, 3 Dec 2025 19:41:55 +0100 Subject: [PATCH 06/25] chore(make): disable parallel backend builds to avoid race conditions Signed-off-by: Alessandro Sturniolo --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 57c58f307970..b2c7fa0df0e3 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +# Disable parallel execution for backend builds +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin + GOCMD=go GOTEST=$(GOCMD) test GOVET=$(GOCMD) vet From 4f02f06d0d522a8d49ab87b0d27c2877d3b74228 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:46:47 +0100 Subject: [PATCH 07/25] chore(make): export CUDA_MAJOR_VERSION and CUDA_MINOR_VERSION for override Signed-off-by: Alessandro Sturniolo --- Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index b2c7fa0df0e3..dfa7921ade90 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,8 @@ UBUNTU_VERSION?=2204 GORELEASER?= export BUILD_TYPE?= +export CUDA_MAJOR_VERSION?=12 +export CUDA_MINOR_VERSION?=9 GO_TAGS?= BUILD_ID?= @@ -335,17 +337,17 @@ docker: --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ -t $(DOCKER_IMAGE) . -docker-cuda11: +docker-cuda12: docker build \ - --build-arg CUDA_MAJOR_VERSION=11 \ - --build-arg CUDA_MINOR_VERSION=8 \ + --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \ + --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ - -t $(DOCKER_IMAGE)-cuda-11 . + -t $(DOCKER_IMAGE)-cuda-12 . docker-aio: @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" From 2d41ac3f559d4999c236cb1f23639bbe4c2b57c3 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:51:57 +0100 Subject: [PATCH 08/25] build(backend): update backend Dockerfiles to Ubuntu 24.04 Signed-off-by: Alessandro Sturniolo --- backend/Dockerfile.golang | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 3dc0d8c92988..c88074f3434e 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -162,6 +162,8 @@ EOT COPY . /LocalAI +RUN git config --global --add safe.directory /LocalAI + RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build FROM scratch From ccf588c6c1c2c702036540f7c2f00ee72242a079 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:55:31 +0100 Subject: [PATCH 09/25] chore(backend): add ROCm env vars and default AMDGPU_TARGETS for hipBLAS builds Signed-off-by: Alessandro Sturniolo --- backend/go/stablediffusion-ggml/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile index e1bb3dea0c20..ee81fc75942b 100644 --- a/backend/go/stablediffusion-ggml/Makefile +++ b/backend/go/stablediffusion-ggml/Makefile @@ -28,7 +28,12 @@ else ifeq ($(BUILD_TYPE),clblas) CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ else ifeq ($(BUILD_TYPE),hipblas) - CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON + ROCM_HOME ?= /opt/rocm + ROCM_PATH ?= /opt/rocm + export CXX=$(ROCM_HOME)/llvm/bin/clang++ + export CC=$(ROCM_HOME)/llvm/bin/clang + AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 + CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON else ifeq ($(OS),Darwin) From 4850ea3da3e15d87c3e62add101fbfe2aa20ae8e Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:56:06 +0100 Subject: [PATCH 10/25] chore(chatterbox): bump ROCm PyTorch to 2.9.1+rocm6.4 and update index URL; align hipblas requirements Signed-off-by: Alessandro Sturniolo --- backend/python/chatterbox/requirements-hipblas.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt index 6c21992a7585..ed30fb824107 100644 --- a/backend/python/chatterbox/requirements-hipblas.txt +++ b/backend/python/chatterbox/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.6.0+rocm6.1 -torchaudio==2.6.0+rocm6.1 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.9.1+rocm6.4 +torchaudio==2.9.1+rocm6.4 transformers numpy>=1.24.0,<1.26.0 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 From 52403f772c8a0b34d0feb686bd14f021aee5d123 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 20:36:16 +0100 Subject: [PATCH 11/25] chore: add local-ai-launcher to .gitignore Signed-off-by: Alessandro Sturniolo --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index caae10a218a1..2ee2ab8588b1 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ go-bert # LocalAI build binary LocalAI /local-ai +/local-ai-launcher # prevent above rules from omitting the helm chart !charts/* # prevent above rules from omitting the api/localai folder From 8c839543cc4db218b437a1a21af655fed79822f5 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 21:27:29 +0100 Subject: [PATCH 12/25] ci(workflows): fix backends GitHub Actions workflows after rebase Signed-off-by: Alessandro Sturniolo --- .github/workflows/backend.yml | 80 +++++------------------------------ 1 file changed, 10 insertions(+), 70 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 2fd2dd504ac8..438908dc0414 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -39,7 +39,6 @@ jobs: #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }} matrix: include: - # CUDA 11 builds - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "9" @@ -64,7 +63,7 @@ jobs: skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./"" + context: "./" ubuntu-version: '2404' - build-type: '' cuda-major-version: "" @@ -78,6 +77,7 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./" + ubuntu-version: '2404' # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -224,7 +224,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' @@ -237,7 +237,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' @@ -250,7 +250,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' @@ -263,7 +263,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' @@ -276,7 +276,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-neutts' @@ -722,7 +722,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "9" + cuda-minor-version: "0" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-vibevoice' @@ -893,18 +893,6 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -971,18 +959,6 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1104,18 +1080,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./" - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1156,18 +1120,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./" - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1196,7 +1148,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "9" + cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -1248,18 +1200,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-neutts' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "neutts" - dockerfile: "./backend/Dockerfile.python" - context: "./" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1275,7 +1215,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "9" + cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' From b0347d34d2a4e23bfa37ef760c5226cb5cf754cd Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Tue, 9 Dec 2025 20:37:28 +0100 Subject: [PATCH 13/25] build(docker): use build-time UBUNTU_VERSION variable Signed-off-by: Alessandro Sturniolo --- Dockerfile | 2 +- backend/Dockerfile.golang | 1 + backend/Dockerfile.llama-cpp | 1 + backend/Dockerfile.python | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 784c575387a2..47c5c59c927e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ ARG SKIP_DRIVERS=false ARG TARGETARCH ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} -ARG UBUNTU_VERSION=2204 +ARG UBUNTU_VERSION=2404 RUN mkdir -p /run/localai RUN echo "default" > /run/localai/capability diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index c88074f3434e..6de3f70509e9 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -13,6 +13,7 @@ ENV DEBIAN_FRONTEND=noninteractive ARG TARGETARCH ARG TARGETVARIANT ARG GO_VERSION=1.25.4 +ARG UBUNTU_VERSION=2404 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index 5800c320fbff..0ec4a8e2e26f 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -70,6 +70,7 @@ ENV DEBIAN_FRONTEND=noninteractive ARG TARGETARCH ARG TARGETVARIANT ARG GO_VERSION=1.25.4 +ARG UBUNTU_VERSION=2404 RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index b87f7e28762e..cfc4d3b60bac 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -12,7 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} ENV DEBIAN_FRONTEND=noninteractive ARG TARGETARCH ARG TARGETVARIANT -ARG UBUNTU_VERSION=2204 +ARG UBUNTU_VERSION=2404 RUN apt-get update && \ apt-get install -y --no-install-recommends \ From 6a84969cb796f04edcdd2122647c62ecd27eac65 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Tue, 9 Dec 2025 20:38:54 +0100 Subject: [PATCH 14/25] chore(docker): remove libquadmath0 from requirements-stage base image Signed-off-by: Alessandro Sturniolo --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 47c5c59c927e..3bba3bf34e17 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl wget espeak-ng libgomp1 \ - ffmpeg libopenblas0 libopenblas-dev libquadmath0 && \ + ffmpeg libopenblas0 libopenblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* From 72e4635763440fe456b9bd50b5fbdbdcfade2ff0 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Tue, 9 Dec 2025 20:39:56 +0100 Subject: [PATCH 15/25] chore(make): add backends/vllm to .NOTPARALLEL to prevent parallel builds Signed-off-by: Alessandro Sturniolo --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dfa7921ade90..a50505c7513d 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Disable parallel execution for backend builds -.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm GOCMD=go GOTEST=$(GOCMD) test From b8b4994cca6ee22490d01a238dcce8497ea27273 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Tue, 9 Dec 2025 20:41:26 +0100 Subject: [PATCH 16/25] fix(docker): correct CUDA installation steps in backend Dockerfiles Signed-off-by: Alessandro Sturniolo --- backend/Dockerfile.golang | 8 ++++++-- backend/Dockerfile.llama-cpp | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 6de3f70509e9..4fc3e0ee680c 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -76,10 +76,14 @@ RUN < Date: Tue, 9 Dec 2025 20:42:56 +0100 Subject: [PATCH 17/25] chore(backend): update ROCm to 6.4 and align Python hipblas requirements Signed-off-by: Alessandro Sturniolo --- backend/python/bark/requirements-hipblas.txt | 6 +++--- backend/python/common/template/requirements-hipblas.txt | 2 +- backend/python/coqui/requirements-hipblas.txt | 6 +++--- backend/python/diffusers/requirements-hipblas.txt | 6 +++--- backend/python/faster-whisper/requirements-hipblas.txt | 2 +- backend/python/kokoro/requirements-hipblas.txt | 6 +++--- backend/python/neutts/requirements-hipblas.txt | 4 ++-- backend/python/rerankers/requirements-hipblas.txt | 4 ++-- backend/python/rfdetr/requirements-hipblas.txt | 6 +++--- backend/python/transformers/requirements-hipblas.txt | 4 ++-- backend/python/vllm/requirements-hipblas.txt | 2 +- 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt index 1d54fb165974..4e1fef6cfaa6 100644 --- a/backend/python/bark/requirements-hipblas.txt +++ b/backend/python/bark/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.4.1+rocm6.0 -torchaudio==2.4.1+rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchaudio==2.8.0+rocm6.4 transformers accelerate \ No newline at end of file diff --git a/backend/python/common/template/requirements-hipblas.txt b/backend/python/common/template/requirements-hipblas.txt index 76018445f448..b733ec7b148b 100644 --- a/backend/python/common/template/requirements-hipblas.txt +++ b/backend/python/common/template/requirements-hipblas.txt @@ -1,2 +1,2 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 torch \ No newline at end of file diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt index 55cdcdddb845..8e7d034591e3 100644 --- a/backend/python/coqui/requirements-hipblas.txt +++ b/backend/python/coqui/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.4.1+rocm6.0 -torchaudio==2.4.1+rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchaudio==2.8.0+rocm6.4 transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt index aeea375639a0..b1f8b3e048c5 100644 --- a/backend/python/diffusers/requirements-hipblas.txt +++ b/backend/python/diffusers/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchvision==0.22.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchvision==0.23.0+rocm6.4 git+https://github.com/huggingface/diffusers opencv-python transformers diff --git a/backend/python/faster-whisper/requirements-hipblas.txt b/backend/python/faster-whisper/requirements-hipblas.txt index 29413f0508b3..da9c9123c0d7 100644 --- a/backend/python/faster-whisper/requirements-hipblas.txt +++ b/backend/python/faster-whisper/requirements-hipblas.txt @@ -1,3 +1,3 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 torch faster-whisper \ No newline at end of file diff --git a/backend/python/kokoro/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt index 1226d917447b..74262df5c3ce 100644 --- a/backend/python/kokoro/requirements-hipblas.txt +++ b/backend/python/kokoro/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchaudio==2.7.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchaudio==2.8.0+rocm6.4 transformers accelerate kokoro diff --git a/backend/python/neutts/requirements-hipblas.txt b/backend/python/neutts/requirements-hipblas.txt index 012d3c8bf6f5..72d11e059817 100644 --- a/backend/python/neutts/requirements-hipblas.txt +++ b/backend/python/neutts/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.8.0+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 transformers==4.56.1 accelerate librosa==0.11.0 diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt index b1c8baeddfe9..7a72b3d0650f 100644 --- a/backend/python/rerankers/requirements-hipblas.txt +++ b/backend/python/rerankers/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 transformers accelerate -torch==2.4.1+rocm6.0 +torch==2.8.0+rocm6.4 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rfdetr/requirements-hipblas.txt b/backend/python/rfdetr/requirements-hipblas.txt index 536a31efb509..884cfba7be46 100644 --- a/backend/python/rfdetr/requirements-hipblas.txt +++ b/backend/python/rfdetr/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchvision==0.22.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchvision==0.23.0+rocm6.4 rfdetr opencv-python accelerate diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index 732a3adfcdc4..59f99e3643fa 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 accelerate transformers llvmlite==0.43.0 diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt index 3a65e0d7c907..db732bc864ef 100644 --- a/backend/python/vllm/requirements-hipblas.txt +++ b/backend/python/vllm/requirements-hipblas.txt @@ -1,4 +1,4 @@ ---extra-index-url https://download.pytorch.org/whl/nightly/rocm6.3 +--extra-index-url https://download.pytorch.org/whl/nightly/rocm6.4 accelerate torch transformers From 05c983624e09e8aa08cf1b6e9c1449bb6c9ba7d5 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Fri, 12 Dec 2025 13:02:33 +0100 Subject: [PATCH 18/25] ci(workflows): switch GitHub Actions runners to Ubuntu-24.04 for CUDA on arm64 builds Signed-off-by: Alessandro Sturniolo --- .github/workflows/backend.yml | 14 +++++++------- .github/workflows/image.yml | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 438908dc0414..42fc4c9fef2a 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -46,7 +46,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-nvidia-l4t-diffusers' runs-on: 'ubuntu-24.04-arm' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" @@ -740,7 +740,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-nvidia-l4t-kokoro' runs-on: 'ubuntu-24.04-arm' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" @@ -860,7 +860,7 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-llama-cpp' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" @@ -939,7 +939,7 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" @@ -1005,7 +1005,7 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-whisper' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" @@ -1100,7 +1100,7 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-rfdetr' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" @@ -1153,7 +1153,7 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-chatterbox' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index ad8ce97bcd4d..a97ac420e022 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -157,11 +157,11 @@ jobs: platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'true' - ubuntu-version: "2204" + ubuntu-version: "2404" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" From 5254fdd91b94f1ef58d335c688a39d4540b7ed8e Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Fri, 12 Dec 2025 13:09:16 +0100 Subject: [PATCH 19/25] build(docker): update base image and backend Dockerfiles for Ubuntu 24.04 compatibility on arm64 Signed-off-by: Alessandro Sturniolo --- Dockerfile | 2 +- backend/Dockerfile.golang | 8 ++------ backend/Dockerfile.llama-cpp | 8 ++------ backend/Dockerfile.python | 8 ++------ 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3bba3bf34e17..fb35a9dfb0b7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -90,7 +90,7 @@ RUN < Date: Fri, 12 Dec 2025 13:10:29 +0100 Subject: [PATCH 20/25] build(backend): increase timeout for uv installs behind slow networks on backend/Dockerfile.python Signed-off-by: Alessandro Sturniolo --- backend/Dockerfile.python | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index a6e59e4def92..1a1c43b1822f 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -157,7 +157,8 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \ # Install uv as a system package RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh ENV PATH="/root/.cargo/bin:${PATH}" - +# Increase timeout for uv installs behind slow networks +ENV UV_HTTP_TIMEOUT=180 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y # Install grpcio-tools (the version in 22.04 is too old) From cf4c4885a9f5c69dc66ec054eef914dbb0926480 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Sat, 13 Dec 2025 18:22:47 +0100 Subject: [PATCH 21/25] ci(workflows): switch GitHub Actions runners to Ubuntu-24.04 for vibevoice backend Signed-off-by: Alessandro Sturniolo --- .github/workflows/backend.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 42fc4c9fef2a..a4f08932b5ea 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -86,12 +86,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vibevoice' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -308,12 +308,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-vibevoice' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -378,7 +378,7 @@ jobs: ubuntu-version: '2404' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'l4t' cuda-major-version: "13" cuda-minor-version: "0" @@ -596,12 +596,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vibevoice' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -727,12 +727,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-nvidia-l4t-vibevoice' runs-on: 'ubuntu-24.04-arm' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -784,8 +784,8 @@ jobs: skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" From 544c51aa9bf0b2321dc2a5650cb4a98fdc0cd76b Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Tue, 16 Dec 2025 15:42:46 +0100 Subject: [PATCH 22/25] ci(workflows): fix failing GitHub Actions runners Signed-off-by: Alessandro Sturniolo --- .github/workflows/backend.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index a4f08932b5ea..9c24ca46fb95 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -857,7 +857,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "9" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-llama-cpp' base-image: "ubuntu:24.04" @@ -936,7 +936,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "9" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml' base-image: "ubuntu:24.04" @@ -1002,7 +1002,7 @@ jobs: cuda-major-version: "12" cuda-minor-version: "9" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-whisper' base-image: "ubuntu:24.04" @@ -1237,7 +1237,7 @@ jobs: skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' backend-jobs-darwin: uses: ./.github/workflows/backend_build_darwin.yml From 0710001db1d4b8e8d62e10e8eab3f0f3cbdf17d7 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Mon, 29 Dec 2025 11:42:19 +0000 Subject: [PATCH 23/25] fix: Allow FROM_SOURCE to be unset, use upstream Intel images etc. Signed-off-by: Richard Palethorpe --- .github/workflows/backend.yml | 34 ++++++++++----------- .github/workflows/generate_intel_image.yaml | 2 +- .github/workflows/image-pr.yml | 14 ++++----- .github/workflows/image.yml | 16 +++++----- Makefile | 2 +- backend/python/vllm/install.sh | 2 +- 6 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 9c24ca46fb95..8b6c38b9c7ca 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -649,7 +649,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-rerankers' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" @@ -662,7 +662,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" @@ -675,7 +675,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" @@ -688,7 +688,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-vllm' runs-on: 'arc-runner-set' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" @@ -701,7 +701,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-transformers' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" @@ -714,7 +714,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-diffusers' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" @@ -754,7 +754,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-kokoro' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" @@ -767,7 +767,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" @@ -780,7 +780,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-vibevoice' runs-on: 'arc-runner-set' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" @@ -793,7 +793,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-coqui' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" @@ -806,7 +806,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-bark' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" @@ -900,7 +900,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" @@ -913,7 +913,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" @@ -966,7 +966,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-whisper' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" @@ -979,7 +979,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-whisper' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" @@ -1087,7 +1087,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-rfdetr' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" @@ -1127,7 +1127,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-exllama2' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml index 0dc47da211ec..c417ceeb8dbd 100644 --- a/.github/workflows/generate_intel_image.yaml +++ b/.github/workflows/generate_intel_image.yaml @@ -53,7 +53,7 @@ jobs: BASE_IMAGE=${{ matrix.base-image }} context: . file: ./Dockerfile - tags: quay.io/go-skynet/intel-oneapi-base:latest + tags: quay.io/go-skynet/intel-oneapi-base:24.04 push: true target: intel platforms: ${{ matrix.platforms }} diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 2db9e5cbafa4..9b7b9ec97b0b 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -44,7 +44,7 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" makeflags: "--jobs=3 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -54,7 +54,7 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=3 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'false' @@ -63,16 +63,16 @@ jobs: grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" grpc-base-image: "ubuntu:24.04" tag-suffix: 'sycl' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'false' @@ -80,7 +80,7 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:24.04" makeflags: "--jobs=4 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -91,4 +91,4 @@ jobs: runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' - ubuntu-version: '2404' \ No newline at end of file + ubuntu-version: '2404' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index a97ac420e022..7f2a316634c2 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -45,7 +45,7 @@ jobs: runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-hipblas" - ubuntu-version: '2204' + ubuntu-version: '2404' core-image-build: uses: ./.github/workflows/image_build.yml @@ -81,7 +81,7 @@ jobs: aio: "-aio-cpu" makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "9" @@ -93,7 +93,7 @@ jobs: skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-12" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -105,7 +105,7 @@ jobs: skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-13" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'auto' @@ -115,17 +115,17 @@ jobs: skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-vulkan" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' platforms: 'linux/amd64' tag-latest: 'auto' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" grpc-base-image: "ubuntu:24.04" tag-suffix: '-gpu-intel' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-intel" - ubuntu-version: '2204' + ubuntu-version: '2404' gh-runner: uses: ./.github/workflows/image_build.yml @@ -172,4 +172,4 @@ jobs: runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' - ubuntu-version: '2404' \ No newline at end of file + ubuntu-version: '2404' diff --git a/Makefile b/Makefile index a50505c7513d..040741f4c1f8 100644 --- a/Makefile +++ b/Makefile @@ -365,7 +365,7 @@ docker-aio-all: docker-image-intel: docker build \ - --build-arg BASE_IMAGE=quay.io/go-skynet/intel-oneapi-base:latest \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh index 364ff7a41b99..7dcd29db4a92 100755 --- a/backend/python/vllm/install.sh +++ b/backend/python/vllm/install.sh @@ -28,7 +28,7 @@ fi # We don't embed this into the images as it is a large dependency and not always needed. # Besides, the speed inference are not actually usable in the current state for production use-cases. -if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then +if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE:-}" == "xtrue" ]; then ensureVenv # https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html if [ ! -d vllm ]; then From 4198530a1f44f463b653760ebe9e812a17c3fd99 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Mon, 5 Jan 2026 15:06:53 +0000 Subject: [PATCH 24/25] chore(build): rm all traces of CUDA 11 Signed-off-by: Richard Palethorpe --- README.md | 17 +-- backend/README.md | 7 +- backend/index.yaml | 115 ------------------ backend/python/README.md | 4 +- backend/python/bark/requirements-cublas11.txt | 5 - .../chatterbox/requirements-cublas11.txt | 8 -- backend/python/common/libbackend.sh | 6 +- .../python/coqui/requirements-cublas11.txt | 6 - .../diffusers/requirements-cublas11.txt | 12 -- .../python/exllama2/requirements-cublas11.txt | 4 - .../faster-whisper/requirements-cublas11.txt | 9 -- .../python/kokoro/requirements-cublas11.txt | 7 -- .../rerankers/requirements-cublas11.txt | 5 - .../python/rfdetr/requirements-cublas11.txt | 8 -- .../transformers/requirements-cublas11.txt | 10 -- .../vibevoice/requirements-cublas11.txt | 22 ---- .../vllm/requirements-cublas11-after.txt | 1 - backend/python/vllm/requirements-cublas11.txt | 5 - .../getting-started/container-images.md | 15 +-- docs/content/installation/docker.md | 10 -- docs/content/reference/compatibility-table.md | 12 +- 21 files changed, 20 insertions(+), 268 deletions(-) delete mode 100644 backend/python/bark/requirements-cublas11.txt delete mode 100644 backend/python/chatterbox/requirements-cublas11.txt delete mode 100644 backend/python/coqui/requirements-cublas11.txt delete mode 100644 backend/python/diffusers/requirements-cublas11.txt delete mode 100644 backend/python/exllama2/requirements-cublas11.txt delete mode 100644 backend/python/faster-whisper/requirements-cublas11.txt delete mode 100644 backend/python/kokoro/requirements-cublas11.txt delete mode 100644 backend/python/rerankers/requirements-cublas11.txt delete mode 100644 backend/python/rfdetr/requirements-cublas11.txt delete mode 100644 backend/python/transformers/requirements-cublas11.txt delete mode 100644 backend/python/vibevoice/requirements-cublas11.txt delete mode 100644 backend/python/vllm/requirements-cublas11-after.txt delete mode 100644 backend/python/vllm/requirements-cublas11.txt diff --git a/README.md b/README.md index b3993940ed1e..092432241837 100644 --- a/README.md +++ b/README.md @@ -152,9 +152,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gp # CUDA 12.0 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 -# CUDA 11.7 -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11 - # NVIDIA Jetson (L4T) ARM64 # CUDA 12 (for Nvidia AGX Orin and similar platforms) docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64 @@ -193,9 +190,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai # NVIDIA CUDA 12 version docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12 -# NVIDIA CUDA 11 version -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11 - # Intel GPU version docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel @@ -279,9 +273,9 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration ### Text Generation & Language Models | Backend | Description | Acceleration Support | |---------|-------------|---------------------| -| **llama.cpp** | LLM inference in C/C++ | CUDA 11/12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU | +| **llama.cpp** | LLM inference in C/C++ | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU | | **vLLM** | Fast LLM inference with PagedAttention | CUDA 12/13, ROCm, Intel | -| **transformers** | HuggingFace transformers framework | CUDA 11/12/13, ROCm, Intel, CPU | +| **transformers** | HuggingFace transformers framework | CUDA 12/13, ROCm, Intel, CPU | | **exllama2** | GPTQ inference library | CUDA 12/13 | | **MLX** | Apple Silicon LLM inference | Metal (M1/M2/M3+) | | **MLX-VLM** | Apple Silicon Vision-Language Models | Metal (M1/M2/M3+) | @@ -295,7 +289,7 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration | **bark-cpp** | C++ implementation of Bark | CUDA, Metal, CPU | | **coqui** | Advanced TTS with 1100+ languages | CUDA 12/13, ROCm, Intel, CPU | | **kokoro** | Lightweight TTS model | CUDA 12/13, ROCm, Intel, CPU | -| **chatterbox** | Production-grade TTS | CUDA 11/12/13, CPU | +| **chatterbox** | Production-grade TTS | CUDA 12/13, CPU | | **piper** | Fast neural TTS system | CPU | | **kitten-tts** | Kitten TTS models | CPU | | **silero-vad** | Voice Activity Detection | CPU | @@ -306,13 +300,13 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration | Backend | Description | Acceleration Support | |---------|-------------|---------------------| | **stablediffusion.cpp** | Stable Diffusion in C/C++ | CUDA 12/13, Intel SYCL, Vulkan, CPU | -| **diffusers** | HuggingFace diffusion models | CUDA 11/12/13, ROCm, Intel, Metal, CPU | +| **diffusers** | HuggingFace diffusion models | CUDA 12/13, ROCm, Intel, Metal, CPU | ### Specialized AI Tasks | Backend | Description | Acceleration Support | |---------|-------------|---------------------| | **rfdetr** | Real-time object detection | CUDA 12/13, Intel, CPU | -| **rerankers** | Document reranking API | CUDA 11/12/13, ROCm, Intel, CPU | +| **rerankers** | Document reranking API | CUDA 12/13, ROCm, Intel, CPU | | **local-store** | Vector database | CPU | | **huggingface** | HuggingFace API integration | API-based | @@ -320,7 +314,6 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration | Acceleration Type | Supported Backends | Hardware Support | |-------------------|-------------------|------------------| -| **NVIDIA CUDA 11** | llama.cpp, whisper, stablediffusion, diffusers, rerankers, bark, chatterbox | Nvidia hardware | | **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware | | **NVIDIA CUDA 13** | All CUDA-compatible backends | Nvidia hardware | | **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark, neutts, vibevoice | AMD Graphics | diff --git a/backend/README.md b/backend/README.md index 87fd9f28f89c..3f3076c046f1 100644 --- a/backend/README.md +++ b/backend/README.md @@ -65,7 +65,7 @@ The backend system provides language-specific Dockerfiles that handle the build ## Hardware Acceleration Support ### CUDA (NVIDIA) -- **Versions**: CUDA 11.x, 12.x +- **Versions**: CUDA 12.x, 13.x - **Features**: cuBLAS, cuDNN, TensorRT optimization - **Targets**: x86_64, ARM64 (Jetson) @@ -132,8 +132,7 @@ For ARM64/Mac builds, docker can't be used, and the makefile in the respective b ### Build Types - **`cpu`**: CPU-only optimization -- **`cublas11`**: CUDA 11.x with cuBLAS -- **`cublas12`**: CUDA 12.x with cuBLAS +- **`cublas12`**, **`cublas13`**: CUDA 12.x, 13.x with cuBLAS - **`hipblas`**: ROCm with rocBLAS - **`intel`**: Intel oneAPI optimization - **`vulkan`**: Vulkan-based acceleration @@ -210,4 +209,4 @@ When contributing to the backend system: 2. **Add Tests**: Include comprehensive test coverage 3. **Document**: Provide clear usage examples 4. **Optimize**: Consider performance and resource usage -5. **Validate**: Test across different hardware targets \ No newline at end of file +5. **Validate**: Test across different hardware targets diff --git a/backend/index.yaml b/backend/index.yaml index a383a15fdf3d..1f8c1f7fb4df 100644 --- a/backend/index.yaml +++ b/backend/index.yaml @@ -634,11 +634,6 @@ uri: "quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp" mirrors: - localai/localai-backends:master-cpu-llama-cpp -- !!merge <<: *llamacpp - name: "cuda11-llama-cpp" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-llama-cpp" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-llama-cpp - !!merge <<: *llamacpp name: "cuda12-llama-cpp" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp" @@ -679,11 +674,6 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp" mirrors: - localai/localai-backends:master-metal-darwin-arm64-llama-cpp -- !!merge <<: *llamacpp - name: "cuda11-llama-cpp-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-llama-cpp" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-llama-cpp - !!merge <<: *llamacpp name: "cuda12-llama-cpp-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-llama-cpp" @@ -755,11 +745,6 @@ uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper" mirrors: - localai/localai-backends:master-cpu-whisper -- !!merge <<: *whispercpp - name: "cuda11-whisper" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-whisper" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-whisper - !!merge <<: *whispercpp name: "cuda12-whisper" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisper" @@ -800,11 +785,6 @@ uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper" mirrors: - localai/localai-backends:master-metal-darwin-arm64-whisper -- !!merge <<: *whispercpp - name: "cuda11-whisper-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-whisper" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-whisper - !!merge <<: *whispercpp name: "cuda12-whisper-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisper" @@ -879,11 +859,6 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml" mirrors: - localai/localai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml -- !!merge <<: *stablediffusionggml - name: "cuda11-stablediffusion-ggml" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml - !!merge <<: *stablediffusionggml name: "cuda12-stablediffusion-ggml-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-stablediffusion-ggml" @@ -899,11 +874,6 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml" mirrors: - localai/localai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml -- !!merge <<: *stablediffusionggml - name: "cuda11-stablediffusion-ggml-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml - !!merge <<: *stablediffusionggml name: "nvidia-l4t-arm64-stablediffusion-ggml-development" uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml" @@ -1054,11 +1024,6 @@ intel: "intel-rerankers-development" amd: "rocm-rerankers-development" nvidia-cuda-13: "cuda13-rerankers-development" -- !!merge <<: *rerankers - name: "cuda11-rerankers" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-rerankers - !!merge <<: *rerankers name: "cuda12-rerankers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers" @@ -1074,11 +1039,6 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers" mirrors: - localai/localai-backends:latest-gpu-rocm-hipblas-rerankers -- !!merge <<: *rerankers - name: "cuda11-rerankers-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-rerankers - !!merge <<: *rerankers name: "cuda12-rerankers-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers" @@ -1127,16 +1087,6 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-transformers" mirrors: - localai/localai-backends:latest-gpu-intel-transformers -- !!merge <<: *transformers - name: "cuda11-transformers-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-transformers -- !!merge <<: *transformers - name: "cuda11-transformers" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-transformers - !!merge <<: *transformers name: "cuda12-transformers-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers" @@ -1213,21 +1163,11 @@ uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers" mirrors: - localai/localai-backends:latest-gpu-rocm-hipblas-diffusers -- !!merge <<: *diffusers - name: "cuda11-diffusers" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-diffusers - !!merge <<: *diffusers name: "intel-diffusers" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-diffusers" mirrors: - localai/localai-backends:latest-gpu-intel-diffusers -- !!merge <<: *diffusers - name: "cuda11-diffusers-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-diffusers - !!merge <<: *diffusers name: "cuda12-diffusers-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers" @@ -1269,21 +1209,11 @@ capabilities: nvidia: "cuda12-exllama2-development" intel: "intel-exllama2-development" -- !!merge <<: *exllama2 - name: "cuda11-exllama2" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-exllama2 - !!merge <<: *exllama2 name: "cuda12-exllama2" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2" mirrors: - localai/localai-backends:latest-gpu-nvidia-cuda-12-exllama2 -- !!merge <<: *exllama2 - name: "cuda11-exllama2-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-exllama2 - !!merge <<: *exllama2 name: "cuda12-exllama2-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2" @@ -1297,11 +1227,6 @@ intel: "intel-kokoro-development" amd: "rocm-kokoro-development" nvidia-l4t: "nvidia-l4t-kokoro-development" -- !!merge <<: *kokoro - name: "cuda11-kokoro-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-kokoro - !!merge <<: *kokoro name: "cuda12-kokoro-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro" @@ -1332,11 +1257,6 @@ uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-kokoro" mirrors: - localai/localai-backends:master-nvidia-l4t-kokoro -- !!merge <<: *kokoro - name: "cuda11-kokoro" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-kokoro - !!merge <<: *kokoro name: "cuda12-kokoro" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoro" @@ -1365,11 +1285,6 @@ intel: "intel-faster-whisper-development" amd: "rocm-faster-whisper-development" nvidia-cuda-13: "cuda13-faster-whisper-development" -- !!merge <<: *faster-whisper - name: "cuda11-faster-whisper" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-faster-whisper - !!merge <<: *faster-whisper name: "cuda12-faster-whisper-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper" @@ -1408,21 +1323,11 @@ nvidia: "cuda12-coqui-development" intel: "intel-coqui-development" amd: "rocm-coqui-development" -- !!merge <<: *coqui - name: "cuda11-coqui" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-coqui" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-coqui - !!merge <<: *coqui name: "cuda12-coqui" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui" mirrors: - localai/localai-backends:latest-gpu-nvidia-cuda-12-coqui -- !!merge <<: *coqui - name: "cuda11-coqui-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-coqui - !!merge <<: *coqui name: "cuda12-coqui-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-coqui" @@ -1455,16 +1360,6 @@ nvidia: "cuda12-bark-development" intel: "intel-bark-development" amd: "rocm-bark-development" -- !!merge <<: *bark - name: "cuda11-bark-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-bark -- !!merge <<: *bark - name: "cuda11-bark" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-bark" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-bark - !!merge <<: *bark name: "rocm-bark-development" uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark" @@ -1546,16 +1441,6 @@ uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox" mirrors: - localai/localai-backends:master-gpu-nvidia-cuda-12-chatterbox -- !!merge <<: *chatterbox - name: "cuda11-chatterbox" - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-chatterbox" - mirrors: - - localai/localai-backends:latest-gpu-nvidia-cuda-11-chatterbox -- !!merge <<: *chatterbox - name: "cuda11-chatterbox-development" - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox" - mirrors: - - localai/localai-backends:master-gpu-nvidia-cuda-11-chatterbox - !!merge <<: *chatterbox name: "cuda12-chatterbox" uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox" diff --git a/backend/python/README.md b/backend/python/README.md index 8c76593467f8..9f894b77b596 100644 --- a/backend/python/README.md +++ b/backend/python/README.md @@ -85,7 +85,7 @@ runUnittests The build system automatically detects and configures for different hardware: - **CPU** - Standard CPU-only builds -- **CUDA** - NVIDIA GPU acceleration (supports CUDA 11/12) +- **CUDA** - NVIDIA GPU acceleration (supports CUDA 12/13) - **Intel** - Intel XPU/GPU optimization - **MLX** - Apple Silicon (M1/M2/M3) optimization - **HIP** - AMD GPU acceleration @@ -95,8 +95,8 @@ The build system automatically detects and configures for different hardware: Backends can specify hardware-specific dependencies: - `requirements.txt` - Base requirements - `requirements-cpu.txt` - CPU-specific packages -- `requirements-cublas11.txt` - CUDA 11 packages - `requirements-cublas12.txt` - CUDA 12 packages +- `requirements-cublas13.txt` - CUDA 13 packages - `requirements-intel.txt` - Intel-optimized packages - `requirements-mps.txt` - Apple Silicon packages diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt deleted file mode 100644 index 9f8fe9ff87a3..000000000000 --- a/backend/python/bark/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -torchaudio==2.4.1+cu118 -transformers -accelerate \ No newline at end of file diff --git a/backend/python/chatterbox/requirements-cublas11.txt b/backend/python/chatterbox/requirements-cublas11.txt deleted file mode 100644 index 7b89d89d65f4..000000000000 --- a/backend/python/chatterbox/requirements-cublas11.txt +++ /dev/null @@ -1,8 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.6.0+cu118 -torchaudio==2.6.0+cu118 -transformers==4.46.3 -numpy>=1.24.0,<1.26.0 -# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 -chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster -accelerate \ No newline at end of file diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index 9af6ca6736f5..eb55f43d9547 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# +# # use the library by adding the following line to a script: # source $(dirname $0)/../common/libbackend.sh # @@ -206,8 +206,8 @@ function init() { # getBuildProfile will inspect the system to determine which build profile is appropriate: # returns one of the following: -# - cublas11 # - cublas12 +# - cublas13 # - hipblas # - intel function getBuildProfile() { @@ -392,7 +392,7 @@ function runProtogen() { # - requirements-${BUILD_TYPE}.txt # - requirements-${BUILD_PROFILE}.txt # -# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-11 or cuda-12 +# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-12 or cuda-13 # it can also include some options that we do not have BUILD_TYPES for, ex: intel # # NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index. diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt deleted file mode 100644 index 97e1ef0a4afe..000000000000 --- a/backend/python/coqui/requirements-cublas11.txt +++ /dev/null @@ -1,6 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -torchaudio==2.4.1+cu118 -transformers==4.48.3 -accelerate -coqui-tts \ No newline at end of file diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt deleted file mode 100644 index 7b77f7f68693..000000000000 --- a/backend/python/diffusers/requirements-cublas11.txt +++ /dev/null @@ -1,12 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -git+https://github.com/huggingface/diffusers -opencv-python -transformers -torchvision==0.22.1 -accelerate -compel -peft -sentencepiece -torch==2.7.1 -optimum-quanto -ftfy \ No newline at end of file diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt deleted file mode 100644 index 2d1958c75153..000000000000 --- a/backend/python/exllama2/requirements-cublas11.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -transformers -accelerate \ No newline at end of file diff --git a/backend/python/faster-whisper/requirements-cublas11.txt b/backend/python/faster-whisper/requirements-cublas11.txt deleted file mode 100644 index b74532957a4a..000000000000 --- a/backend/python/faster-whisper/requirements-cublas11.txt +++ /dev/null @@ -1,9 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -faster-whisper -opencv-python -accelerate -compel -peft -sentencepiece -optimum-quanto \ No newline at end of file diff --git a/backend/python/kokoro/requirements-cublas11.txt b/backend/python/kokoro/requirements-cublas11.txt deleted file mode 100644 index 628933b5640a..000000000000 --- a/backend/python/kokoro/requirements-cublas11.txt +++ /dev/null @@ -1,7 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.7.1+cu118 -torchaudio==2.7.1+cu118 -transformers -accelerate -kokoro -soundfile \ No newline at end of file diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt deleted file mode 100644 index fef296fe8bb3..000000000000 --- a/backend/python/rerankers/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -transformers -accelerate -torch==2.4.1+cu118 -rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rfdetr/requirements-cublas11.txt b/backend/python/rfdetr/requirements-cublas11.txt deleted file mode 100644 index 14449b3d4b00..000000000000 --- a/backend/python/rfdetr/requirements-cublas11.txt +++ /dev/null @@ -1,8 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.7.1+cu118 -rfdetr -opencv-python -accelerate -inference -peft -optimum-quanto \ No newline at end of file diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt deleted file mode 100644 index 8402f001e3cd..000000000000 --- a/backend/python/transformers/requirements-cublas11.txt +++ /dev/null @@ -1,10 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.7.1+cu118 -llvmlite==0.43.0 -numba==0.60.0 -accelerate -transformers -bitsandbytes -outetts -sentence-transformers==5.2.0 -protobuf==6.33.2 \ No newline at end of file diff --git a/backend/python/vibevoice/requirements-cublas11.txt b/backend/python/vibevoice/requirements-cublas11.txt deleted file mode 100644 index 547b198aa870..000000000000 --- a/backend/python/vibevoice/requirements-cublas11.txt +++ /dev/null @@ -1,22 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -git+https://github.com/huggingface/diffusers -opencv-python -transformers==4.51.3 -torchvision==0.22.1 -accelerate -compel -peft -sentencepiece -torch==2.7.1 -optimum-quanto -ftfy -llvmlite>=0.40.0 -numba>=0.57.0 -tqdm -numpy -scipy -librosa -ml-collections -absl-py -gradio -av \ No newline at end of file diff --git a/backend/python/vllm/requirements-cublas11-after.txt b/backend/python/vllm/requirements-cublas11-after.txt deleted file mode 100644 index 7bfe8efeb555..000000000000 --- a/backend/python/vllm/requirements-cublas11-after.txt +++ /dev/null @@ -1 +0,0 @@ -flash-attn \ No newline at end of file diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt deleted file mode 100644 index 95fdd7aea702..000000000000 --- a/backend/python/vllm/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -accelerate -torch==2.7.0+cu118 -transformers -bitsandbytes \ No newline at end of file diff --git a/docs/content/getting-started/container-images.md b/docs/content/getting-started/container-images.md index 5f4db3929853..7ea98965de4f 100644 --- a/docs/content/getting-started/container-images.md +++ b/docs/content/getting-started/container-images.md @@ -50,16 +50,6 @@ Standard container images do not have pre-installed models. Use these if you wan {{% /tab %}} -{{% tab title="GPU Images CUDA 11" %}} - -| Description | Quay | Docker Hub | -| --- | --- |-------------------------------------------------------------| -| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-11` | `localai/localai:master-gpu-nvidia-cuda-11` | -| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-11` | - -{{% /tab %}} - {{% tab title="GPU Images CUDA 12" %}} | Description | Quay | Docker Hub | @@ -169,11 +159,9 @@ services: image: localai/localai:latest-aio-cpu # For a specific version: # image: localai/localai:{{< version >}}-aio-cpu - # For Nvidia GPUs decomment one of the following (cuda11, cuda12, or cuda13): - # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-11 + # For Nvidia GPUs decomment one of the following (cuda12 or cuda13): # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12 # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-13 - # image: localai/localai:latest-aio-gpu-nvidia-cuda-11 # image: localai/localai:latest-aio-gpu-nvidia-cuda-12 # image: localai/localai:latest-aio-gpu-nvidia-cuda-13 healthcheck: @@ -225,7 +213,6 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/lo | --- | --- |-----------------------------------------------| | Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` | | Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` | -| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | | Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | | Latest images for Nvidia GPU (CUDA13) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-13` | `localai/localai:latest-aio-gpu-nvidia-cuda-13` | | Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | diff --git a/docs/content/installation/docker.md b/docs/content/installation/docker.md index 1a3ea706c551..7cb354f98a8a 100644 --- a/docs/content/installation/docker.md +++ b/docs/content/installation/docker.md @@ -68,11 +68,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gp docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 ``` -**NVIDIA CUDA 11:** -```bash -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11 -``` - **AMD GPU (ROCm):** ```bash docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas @@ -122,11 +117,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12 ``` -**NVIDIA CUDA 11:** -```bash -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11 -``` - **AMD GPU (ROCm):** ```bash docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas diff --git a/docs/content/reference/compatibility-table.md b/docs/content/reference/compatibility-table.md index b34b3d452711..97bc61313dc0 100644 --- a/docs/content/reference/compatibility-table.md +++ b/docs/content/reference/compatibility-table.md @@ -18,9 +18,9 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| -| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}}) | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes | yes | CUDA 11/12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU | +| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}}) | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes | yes | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU | | [vLLM](https://github.com/vllm-project/vllm) | Various GPTs and quantization formats | yes | GPT | no | no | CUDA 12/13, ROCm, Intel | -| [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats | yes | GPT, embeddings, Audio generation | yes | yes* | CUDA 11/12/13, ROCm, Intel, CPU | +| [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats | yes | GPT, embeddings, Audio generation | yes | yes* | CUDA 12/13, ROCm, Intel, CPU | | [exllama2](https://github.com/turboderp-org/exllamav2) | GPTQ | yes | GPT only | no | no | CUDA 12/13 | | [MLX](https://github.com/ml-explore/mlx-lm) | Various LLMs | yes | GPT | no | no | Metal (Apple Silicon) | | [MLX-VLM](https://github.com/Blaizzy/mlx-vlm) | Vision-Language Models | yes | Multimodal GPT | no | no | Metal (Apple Silicon) | @@ -37,7 +37,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | [bark-cpp](https://github.com/PABannier/bark.cpp) | bark | no | Audio-Only | no | no | CUDA, Metal, CPU | | [coqui](https://github.com/idiap/coqui-ai-TTS) | Coqui TTS | no | Audio generation and Voice cloning | no | no | CUDA 12/13, ROCm, Intel, CPU | | [kokoro](https://github.com/hexgrad/kokoro) | Kokoro TTS | no | Text-to-speech | no | no | CUDA 12/13, ROCm, Intel, CPU | -| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS | no | Text-to-speech | no | no | CUDA 11/12/13, CPU | +| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS | no | Text-to-speech | no | no | CUDA 12/13, CPU | | [kitten-tts](https://github.com/KittenML/KittenTTS) | Kitten TTS | no | Text-to-speech | no | no | CPU | | [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD | no | Voice Activity Detection | no | no | CPU | | [neutts](https://github.com/neuphonic/neuttsair) | NeuTTSAir | no | Text-to-speech with voice cloning | no | no | CUDA 12/13, ROCm, CPU | @@ -49,7 +49,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| | [stablediffusion.cpp](https://github.com/leejet/stable-diffusion.cpp) | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker | no | Image | no | no | CUDA 12/13, Intel SYCL, Vulkan, CPU | -| [diffusers](https://github.com/huggingface/diffusers) | SD, various diffusion models,... | no | Image/Video generation | no | no | CUDA 11/12/13, ROCm, Intel, Metal, CPU | +| [diffusers](https://github.com/huggingface/diffusers) | SD, various diffusion models,... | no | Image/Video generation | no | no | CUDA 12/13, ROCm, Intel, Metal, CPU | | [transformers-musicgen](https://github.com/huggingface/transformers) | MusicGen | no | Audio generation | no | no | CUDA, CPU | ## Specialized AI Tasks @@ -57,14 +57,14 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| | [rfdetr](https://github.com/roboflow/rf-detr) | RF-DETR | no | Object Detection | no | no | CUDA 12/13, Intel, CPU | -| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API | no | Reranking | no | no | CUDA 11/12/13, ROCm, Intel, CPU | +| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API | no | Reranking | no | no | CUDA 12/13, ROCm, Intel, CPU | | [local-store](https://github.com/mudler/LocalAI) | Vector database | no | Vector storage | yes | no | CPU | | [huggingface](https://huggingface.co/docs/hub/en/api) | HuggingFace API models | yes | Various AI tasks | yes | yes | API-based | ## Acceleration Support Summary ### GPU Acceleration -- **NVIDIA CUDA**: CUDA 11.7, CUDA 12.0, CUDA 13.0 support across most backends +- **NVIDIA CUDA**: CUDA 12.0, CUDA 13.0 support across most backends - **AMD ROCm**: HIP-based acceleration for AMD GPUs - **Intel oneAPI**: SYCL-based acceleration for Intel GPUs (F16/F32 precision) - **Vulkan**: Cross-platform GPU acceleration From 6d04e235b4fb58850b8a375952fa74beabed5f2c Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Tue, 6 Jan 2026 12:55:11 +0000 Subject: [PATCH 25/25] chore(build): Add Ubuntu codename as an argument Signed-off-by: Richard Palethorpe --- .github/workflows/image.yml | 11 +++++++++++ .github/workflows/image_build.yml | 7 +++++++ Dockerfile | 3 ++- Makefile | 8 ++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 7f2a316634c2..3550113f531b 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -28,6 +28,7 @@ jobs: aio: ${{ matrix.aio }} makeflags: ${{ matrix.makeflags }} ubuntu-version: ${{ matrix.ubuntu-version }} + ubuntu-codename: ${{ matrix.ubuntu-codename }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -46,6 +47,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-hipblas" ubuntu-version: '2404' + ubuntu-codename: 'noble' core-image-build: uses: ./.github/workflows/image_build.yml @@ -63,6 +65,7 @@ jobs: makeflags: ${{ matrix.makeflags }} skip-drivers: ${{ matrix.skip-drivers }} ubuntu-version: ${{ matrix.ubuntu-version }} + ubuntu-codename: ${{ matrix.ubuntu-codename }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -82,6 +85,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' ubuntu-version: '2404' + ubuntu-codename: 'noble' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "9" @@ -94,6 +98,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-12" ubuntu-version: '2404' + ubuntu-codename: 'noble' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -106,6 +111,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-13" ubuntu-version: '2404' + ubuntu-codename: 'noble' - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'auto' @@ -116,6 +122,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-vulkan" ubuntu-version: '2404' + ubuntu-codename: 'noble' - build-type: 'intel' platforms: 'linux/amd64' tag-latest: 'auto' @@ -126,6 +133,7 @@ jobs: makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-intel" ubuntu-version: '2404' + ubuntu-codename: 'noble' gh-runner: uses: ./.github/workflows/image_build.yml @@ -143,6 +151,7 @@ jobs: makeflags: ${{ matrix.makeflags }} skip-drivers: ${{ matrix.skip-drivers }} ubuntu-version: ${{ matrix.ubuntu-version }} + ubuntu-codename: ${{ matrix.ubuntu-codename }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -162,6 +171,7 @@ jobs: makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'true' ubuntu-version: "2404" + ubuntu-codename: 'noble' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -173,3 +183,4 @@ jobs: makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' ubuntu-version: '2404' + ubuntu-codename: 'noble' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 39cfa1401052..d72da8af03a4 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -61,6 +61,11 @@ on: required: false default: '2204' type: string + ubuntu-codename: + description: 'Ubuntu codename' + required: false + default: 'noble' + type: string secrets: dockerUsername: required: true @@ -244,6 +249,7 @@ jobs: MAKEFLAGS=${{ inputs.makeflags }} SKIP_DRIVERS=${{ inputs.skip-drivers }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} + UBUNTU_CODENAME=${{ inputs.ubuntu-codename }} context: . file: ./Dockerfile cache-from: type=gha @@ -272,6 +278,7 @@ jobs: MAKEFLAGS=${{ inputs.makeflags }} SKIP_DRIVERS=${{ inputs.skip-drivers }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} + UBUNTU_CODENAME=${{ inputs.ubuntu-codename }} context: . file: ./Dockerfile cache-from: type=gha diff --git a/Dockerfile b/Dockerfile index fb35a9dfb0b7..4f1c125548f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ ARG BASE_IMAGE=ubuntu:24.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} ARG INTEL_BASE_IMAGE=${BASE_IMAGE} +ARG UBUNTU_CODENAME=noble FROM ${BASE_IMAGE} AS requirements @@ -224,7 +225,7 @@ WORKDIR /build FROM ${INTEL_BASE_IMAGE} AS intel RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list +RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu ${UBUNTU_CODENAME}/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list RUN apt-get update && \ apt-get install -y --no-install-recommends \ intel-oneapi-runtime-libs && \ diff --git a/Makefile b/Makefile index 040741f4c1f8..9eb59fefb8ba 100644 --- a/Makefile +++ b/Makefile @@ -10,6 +10,7 @@ LAUNCHER_BINARY_NAME=local-ai-launcher CUDA_MAJOR_VERSION?=13 CUDA_MINOR_VERSION?=0 UBUNTU_VERSION?=2204 +UBUNTU_CODENAME?=noble GORELEASER?= @@ -169,6 +170,7 @@ docker-build-aio: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ --build-arg GO_TAGS="$(GO_TAGS)" \ -t local-ai:tests -f Dockerfile . BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio @@ -199,6 +201,7 @@ prepare-e2e: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ -t localai-tests . @@ -335,6 +338,7 @@ docker: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ -t $(DOCKER_IMAGE) . docker-cuda12: @@ -347,6 +351,7 @@ docker-cuda12: --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ -t $(DOCKER_IMAGE)-cuda-12 . docker-aio: @@ -357,6 +362,7 @@ docker-aio: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio . docker-aio-all: @@ -373,6 +379,7 @@ docker-image-intel: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ -t $(DOCKER_IMAGE) . ######################################################## @@ -458,6 +465,7 @@ define docker-build-backend --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ $(if $(filter true,$(5)),--build-arg BACKEND=$(1)) \ -t local-ai-backend:$(1) -f backend/Dockerfile.$(2) $(3) endef