From c626fbce93c2d2df4d230427fa9ef9fa7688a3ff Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 13:23:07 +0100
Subject: [PATCH 01/25] ci(workflows): bump GitHub Actions images to Ubuntu
 24.04

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .github/workflows/backend.yml               | 330 ++++++++++++--------
 .github/workflows/generate_grpc_cache.yaml  |   2 +-
 .github/workflows/generate_intel_image.yaml |   2 +-
 .github/workflows/image-pr.yml              |  10 +-
 .github/workflows/image.yml                 |  12 +-
 5 files changed, 208 insertions(+), 148 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 66b1c683b88f..0c63091007e5 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -52,7 +52,7 @@ jobs:
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -65,7 +65,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -78,7 +78,7 @@ jobs:
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -91,7 +91,7 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -104,7 +104,7 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -112,12 +112,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -125,12 +125,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # CUDA 11 additional backends
           - build-type: 'cublas'
             cuda-major-version: "11"
@@ -144,7 +144,7 @@ jobs:
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -157,7 +157,7 @@ jobs:
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -170,7 +170,7 @@ jobs:
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -183,7 +183,7 @@ jobs:
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -196,7 +196,7 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -218,12 +218,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -231,12 +231,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -244,12 +244,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vllm'
             runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -257,12 +257,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-transformers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -270,12 +270,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -283,12 +283,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -296,12 +296,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -309,12 +309,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-coqui'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -322,12 +322,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-bark'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -335,12 +335,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -353,7 +353,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -366,7 +366,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -379,7 +379,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -392,7 +392,7 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -405,7 +405,7 @@ jobs:
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # cuda 13
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -419,7 +419,7 @@ jobs:
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -445,7 +445,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -471,7 +471,7 @@ jobs:
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -484,7 +484,7 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -523,7 +523,7 @@ jobs:
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -536,7 +536,7 @@ jobs:
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -549,7 +549,7 @@ jobs:
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -562,7 +562,7 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -575,7 +575,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -601,7 +601,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -627,7 +627,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # hipblas builds
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -636,12 +636,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -649,12 +649,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -662,12 +662,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-vllm'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -675,12 +675,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-transformers'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -688,12 +688,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-diffusers'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # ROCm additional backends
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -702,12 +702,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-kokoro'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -728,12 +728,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -741,12 +741,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-coqui'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -754,12 +754,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-bark'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
             # sycl builds
           - build-type: 'intel'
             cuda-major-version: ""
@@ -773,7 +773,7 @@ jobs:
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -786,7 +786,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -799,7 +799,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -812,7 +812,7 @@ jobs:
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -825,7 +825,7 @@ jobs:
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -838,7 +838,7 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -864,7 +864,7 @@ jobs:
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # SYCL additional backends
           - build-type: 'intel'
             cuda-major-version: ""
@@ -878,7 +878,7 @@ jobs:
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -891,7 +891,7 @@ jobs:
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -917,7 +917,7 @@ jobs:
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -930,7 +930,7 @@ jobs:
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # piper
           - build-type: ''
             cuda-major-version: ""
@@ -939,12 +939,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-piper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "piper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # bark-cpp
           - build-type: ''
             cuda-major-version: ""
@@ -953,12 +953,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-bark-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark-cpp"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -966,12 +966,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -984,7 +984,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -992,12 +992,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # Stablediffusion-ggml
           - build-type: ''
             cuda-major-version: ""
@@ -1006,12 +1006,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "stablediffusion-ggml"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -1024,7 +1036,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1037,7 +1049,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1050,7 +1062,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1058,12 +1070,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1076,7 +1088,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # whisper
           - build-type: ''
             cuda-major-version: ""
@@ -1085,12 +1097,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -1103,7 +1127,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1116,7 +1140,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1129,7 +1153,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1137,12 +1161,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1155,20 +1179,20 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-whisper'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             runs-on: 'ubuntu-latest'
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           #silero-vad
           - build-type: ''
             cuda-major-version: ""
@@ -1177,12 +1201,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-silero-vad'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "silero-vad"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # local-store
           - build-type: ''
             cuda-major-version: ""
@@ -1191,12 +1215,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-local-store'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "local-store"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # huggingface
           - build-type: ''
             cuda-major-version: ""
@@ -1205,12 +1229,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-huggingface'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "huggingface"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # rfdetr
           - build-type: ''
             cuda-major-version: ""
@@ -1219,12 +1243,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "rfdetr"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -1237,7 +1273,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1250,7 +1286,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1263,7 +1299,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # exllama2
           - build-type: ''
             cuda-major-version: ""
@@ -1272,12 +1308,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-exllama2'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "exllama2"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -1290,7 +1338,7 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1303,7 +1351,7 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1311,12 +1359,12 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-gpu-hipblas-exllama2'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             runs-on: 'ubuntu-latest'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1329,7 +1377,7 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # runs out of space on the runner
           # - build-type: 'hipblas'
           #   cuda-major-version: ""
@@ -1337,7 +1385,7 @@ jobs:
           #   platforms: 'linux/amd64'
           #   tag-latest: 'auto'
           #   tag-suffix: '-gpu-hipblas-rfdetr'
-          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   base-image: "rocm/dev-ubuntu-24.04:6.4.4"
           #   runs-on: 'ubuntu-latest'
           #   skip-drivers: 'false'
           #   backend: "rfdetr"
@@ -1351,12 +1399,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-kitten-tts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kitten-tts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # neutts
           - build-type: ''
             cuda-major-version: ""
@@ -1365,12 +1413,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-neutts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "neutts"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-neutts'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1378,12 +1438,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-neutts'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1391,12 +1451,12 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-neutts'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1404,12 +1464,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-vibevoice'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
   backend-jobs-darwin:
     uses: ./.github/workflows/backend_build_darwin.yml
     strategy:
diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml
index feadf0948bdc..72a2b306741d 100644
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - grpc-base-image: ubuntu:22.04
+          - grpc-base-image: ubuntu:24.04
             runs-on: 'ubuntu-latest'
             platforms: 'linux/amd64,linux/arm64'
     runs-on: ${{matrix.runs-on}}
diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml
index 5c0160addb38..0dc47da211ec 100644
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04
             runs-on: 'arc-runner-set'
             platforms: 'linux/amd64'
     runs-on: ${{matrix.runs-on}}
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 84ffa5a1320c..055f26036cfd 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -42,7 +42,7 @@ jobs:
             tag-latest: 'false'
             tag-suffix: '-gpu-nvidia-cuda-12'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             makeflags: "--jobs=3 --output-sync=target"
             ubuntu-version: '2204'
           - build-type: 'cublas'
@@ -59,8 +59,8 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-hipblas'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            grpc-base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             ubuntu-version: '2204'
@@ -68,7 +68,7 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'false'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            grpc-base-image: "ubuntu:24.04"
             tag-suffix: 'sycl'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
@@ -78,7 +78,7 @@ jobs:
             tag-latest: 'false'
             tag-suffix: '-vulkan-core'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             makeflags: "--jobs=4 --output-sync=target"
             ubuntu-version: '2204'
           - build-type: 'cublas'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 7389760912c5..8c67434f9d1a 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -40,8 +40,8 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-hipblas'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            grpc-base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-hipblas"
@@ -76,7 +76,7 @@ jobs:
             platforms: 'linux/amd64,linux/arm64'
             tag-latest: 'auto'
             tag-suffix: ''
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             aio: "-aio-cpu"
             makeflags: "--jobs=4 --output-sync=target"
@@ -101,7 +101,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-12"
@@ -123,7 +123,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-vulkan"
@@ -132,7 +132,7 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            grpc-base-image: "ubuntu:24.04"
             tag-suffix: '-gpu-intel'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"

From 168bd8ab7ed5755f6e5b79b4fb4aaf9892c0aa6c Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 13:27:37 +0100
Subject: [PATCH 02/25] ci(workflows): remove CUDA 11.x support from GitHub
 Actions (incompatible with ubuntu:24.04)

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .github/workflows/backend.yml | 171 ----------------------------------
 .github/workflows/image.yml   |  12 ---
 2 files changed, 183 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 0c63091007e5..63b9807add60 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -40,58 +40,6 @@ jobs:
       matrix:
         include:
           # CUDA 11 builds
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rerankers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "transformers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -130,73 +78,6 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2404'
-          # CUDA 11 additional backends
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "kokoro"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "faster-whisper"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "coqui"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-bark'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "bark"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "chatterbox"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -1024,19 +905,6 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1115,19 +983,6 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1261,19 +1116,6 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1326,19 +1168,6 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 8c67434f9d1a..ab69c98ac3c4 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -82,18 +82,6 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
             ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'false'
-            aio: "-aio-gpu-nvidia-cuda-11"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"

From 39a5690083e8401789438bb945effa137eaa7d51 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 13:29:46 +0100
Subject: [PATCH 03/25] ci(workflows): bump GitHub Actions CUDA support to 12.9

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .github/workflows/backend.yml     | 44 +++++++++++++++----------------
 .github/workflows/image-pr.yml    |  2 +-
 .github/workflows/image.yml       |  4 +--
 .github/workflows/image_build.yml |  2 +-
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 63b9807add60..be1d8c3eb0f5 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -42,7 +42,7 @@ jobs:
           # CUDA 11 builds
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-diffusers'
@@ -81,7 +81,7 @@ jobs:
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vibevoice'
@@ -107,7 +107,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
@@ -120,7 +120,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vllm'
@@ -133,7 +133,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-transformers'
@@ -146,7 +146,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
@@ -159,7 +159,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
@@ -172,7 +172,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
@@ -185,7 +185,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-coqui'
@@ -198,7 +198,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-bark'
@@ -211,7 +211,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
@@ -722,7 +722,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-vibevoice'
@@ -855,7 +855,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -895,7 +895,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
@@ -946,7 +946,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -973,7 +973,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
@@ -1024,7 +1024,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -1106,7 +1106,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
@@ -1158,7 +1158,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
@@ -1196,7 +1196,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -1250,7 +1250,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-neutts'
@@ -1275,7 +1275,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 055f26036cfd..2db9e5cbafa4 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -37,7 +37,7 @@ jobs:
         include:
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-gpu-nvidia-cuda-12'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index ab69c98ac3c4..ad8ce97bcd4d 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -84,7 +84,7 @@ jobs:
             ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12'
@@ -153,7 +153,7 @@ jobs:
         include:
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64'
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 31a1f2310ea4..39cfa1401052 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -23,7 +23,7 @@ on:
         type: string
       cuda-minor-version:
         description: 'CUDA minor version'
-        default: "4"
+        default: "9"
         type: string
       platforms:
         description: 'Platforms'

From 813f0e6184d10d5541ee45e908d318044ca15da6 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 15:00:22 +0100
Subject: [PATCH 04/25] build(docker): bump base image to ubuntu:24.04 and
 adjust Vulkan SDK/packages

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 Dockerfile                   | 38 +++++++++++++++++++++++---------
 Dockerfile.aio               |  2 +-
 Makefile                     |  2 +-
 backend/Dockerfile.golang    | 42 ++++++++++++++++++++++++------------
 backend/Dockerfile.llama-cpp | 34 ++++++++++++++++++++++-------
 backend/Dockerfile.python    | 31 ++++++++++++++++++++------
 docker-compose.yaml          |  2 +-
 7 files changed, 110 insertions(+), 41 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 28147e75b856..784c575387a2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
 
@@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         ca-certificates curl wget espeak-ng libgomp1 \
-        ffmpeg && \
+        ffmpeg libopenblas0 libopenblas-dev libquadmath0 && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
@@ -34,11 +34,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils mesa-vulkan-drivers && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/* && \
         echo "vulkan" > /run/localai/capability
@@ -141,13 +160,12 @@ ENV PATH=/opt/rocm/bin:${PATH}
 # The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM requirements-drivers AS build-requirements
 
-ARG GO_VERSION=1.22.6
+ARG GO_VERSION=1.25.4
 ARG CMAKE_VERSION=3.31.10
 ARG CMAKE_FROM_SOURCE=false
 ARG TARGETARCH
 ARG TARGETVARIANT
 
-
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         build-essential \
@@ -206,7 +224,7 @@ WORKDIR /build
 FROM ${INTEL_BASE_IMAGE} AS intel
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
 gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
+RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         intel-oneapi-runtime-libs && \
diff --git a/Dockerfile.aio b/Dockerfile.aio
index 81063bb4dbeb..ccc2fc94b9ed 100644
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} 
 
diff --git a/Makefile b/Makefile
index 6df349eb66d7..57c58f307970 100644
--- a/Makefile
+++ b/Makefile
@@ -318,7 +318,7 @@ test-extra: prepare-test-extra
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
-BASE_IMAGE?=ubuntu:22.04
+BASE_IMAGE?=ubuntu:24.04
 
 docker:
 	docker build \
diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang
index 1db39c9e1d63..3dc0d8c92988 100644
--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
@@ -12,8 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
-ARG GO_VERSION=1.22.6
-ARG UBUNTU_VERSION=2204
+ARG GO_VERSION=1.25.4
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -40,11 +39,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
@@ -57,14 +75,10 @@ RUN <<EOT bash
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils
         if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
-            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
-            fi
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/arm64/cuda-keyring_1.1-1_all.deb
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp
index 7990720af00f..5800c320fbff 100644
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 
 
@@ -69,8 +69,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
-ARG GO_VERSION=1.22.6
-ARG UBUNTU_VERSION=2204
+ARG GO_VERSION=1.25.4
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -97,11 +96,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index 2faddea31bdc..ca589e84045a 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
@@ -54,11 +54,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
diff --git a/docker-compose.yaml b/docker-compose.yaml
index b9880352ad8f..765a3fb63b2e 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -11,7 +11,7 @@ services:
       dockerfile: Dockerfile
       args:
       - IMAGE_TYPE=core
-      - BASE_IMAGE=ubuntu:22.04
+      - BASE_IMAGE=ubuntu:24.04
     ports:
       - 8080:8080
     env_file:

From 4bc3d8b37d3ba12046bd53593b92eeee1c971ea3 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:39:44 +0100
Subject: [PATCH 05/25] fix(backend): correct context paths for Python backends
 in workflows, Makefile and Dockerfile

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .github/workflows/backend.yml | 144 +++++++++++++++++-----------------
 backend/Dockerfile.python     |   6 +-
 2 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index be1d8c3eb0f5..2fd2dd504ac8 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -51,7 +51,7 @@ jobs:
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
@@ -64,7 +64,7 @@ jobs:
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./""
             ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
@@ -77,7 +77,7 @@ jobs:
             skip-drivers: 'true'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -103,7 +103,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -129,7 +129,7 @@ jobs:
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -142,7 +142,7 @@ jobs:
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -155,7 +155,7 @@ jobs:
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -168,7 +168,7 @@ jobs:
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -181,7 +181,7 @@ jobs:
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -194,7 +194,7 @@ jobs:
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -207,7 +207,7 @@ jobs:
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -220,7 +220,7 @@ jobs:
             skip-drivers: 'false'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -229,7 +229,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
@@ -242,7 +242,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
@@ -255,11 +255,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -268,11 +268,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -281,11 +281,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-neutts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # cuda 13
           - build-type: 'cublas'
@@ -295,11 +295,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -321,7 +321,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
@@ -347,11 +347,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-transformers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -360,11 +360,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "13"
@@ -391,7 +391,7 @@ jobs:
             ubuntu-version: '2404'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -399,11 +399,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-kokoro'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -412,11 +412,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -425,11 +425,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-bark'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -438,11 +438,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -451,7 +451,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
@@ -477,7 +477,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
@@ -503,11 +503,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # hipblas builds
           - build-type: 'hipblas'
@@ -521,7 +521,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -547,7 +547,7 @@ jobs:
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -560,7 +560,7 @@ jobs:
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -573,7 +573,7 @@ jobs:
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # ROCm additional backends
           - build-type: 'hipblas'
@@ -587,7 +587,7 @@ jobs:
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -613,7 +613,7 @@ jobs:
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -626,7 +626,7 @@ jobs:
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -639,7 +639,7 @@ jobs:
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
             # sycl builds
           - build-type: 'intel'
@@ -653,7 +653,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
@@ -692,7 +692,7 @@ jobs:
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -705,7 +705,7 @@ jobs:
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -718,7 +718,7 @@ jobs:
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
@@ -744,7 +744,7 @@ jobs:
             skip-drivers: 'true'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # SYCL additional backends
           - build-type: 'intel'
@@ -758,7 +758,7 @@ jobs:
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -771,7 +771,7 @@ jobs:
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -797,7 +797,7 @@ jobs:
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -810,7 +810,7 @@ jobs:
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # piper
           - build-type: ''
@@ -1102,7 +1102,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -1115,7 +1115,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1127,7 +1127,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
@@ -1140,7 +1140,7 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # exllama2
           - build-type: ''
@@ -1154,7 +1154,7 @@ jobs:
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -1167,7 +1167,7 @@ jobs:
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1179,7 +1179,7 @@ jobs:
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -1192,7 +1192,7 @@ jobs:
             runs-on: 'ubuntu-latest'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
@@ -1205,7 +1205,7 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # runs out of space on the runner
           # - build-type: 'hipblas'
@@ -1219,7 +1219,7 @@ jobs:
           #   skip-drivers: 'false'
           #   backend: "rfdetr"
           #   dockerfile: "./backend/Dockerfile.python"
-          #   context: "./backend"
+          #   context: "./"
           # kitten-tts
           - build-type: ''
             cuda-major-version: ""
@@ -1232,7 +1232,7 @@ jobs:
             skip-drivers: 'false'
             backend: "kitten-tts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # neutts
           - build-type: ''
@@ -1246,7 +1246,7 @@ jobs:
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -1259,7 +1259,7 @@ jobs:
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1271,7 +1271,7 @@ jobs:
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
@@ -1284,7 +1284,7 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index ca589e84045a..b87f7e28762e 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -174,9 +174,9 @@ RUN <<EOT bash
 EOT
 
 
-COPY python/${BACKEND} /${BACKEND}
-COPY backend.proto /${BACKEND}/backend.proto
-COPY python/common/ /${BACKEND}/common
+COPY backend/python/${BACKEND} /${BACKEND}
+COPY backend/backend.proto /${BACKEND}/backend.proto
+COPY backend/python/common/ /${BACKEND}/common
 
 RUN cd /${BACKEND} && PORTABLE_PYTHON=true make
 

From 54cee8e74d40889ba54e2e399ba99cea5f9440fb Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:41:55 +0100
Subject: [PATCH 06/25] chore(make): disable parallel backend builds to avoid
 race conditions

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Makefile b/Makefile
index 57c58f307970..b2c7fa0df0e3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,6 @@
+# Disable parallel execution for backend builds
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin
+
 GOCMD=go
 GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet

From 4f02f06d0d522a8d49ab87b0d27c2877d3b74228 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:46:47 +0100
Subject: [PATCH 07/25] chore(make): export CUDA_MAJOR_VERSION and
 CUDA_MINOR_VERSION for override

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 Makefile | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index b2c7fa0df0e3..dfa7921ade90 100644
--- a/Makefile
+++ b/Makefile
@@ -14,6 +14,8 @@ UBUNTU_VERSION?=2204
 GORELEASER?=
 
 export BUILD_TYPE?=
+export CUDA_MAJOR_VERSION?=12
+export CUDA_MINOR_VERSION?=9
 
 GO_TAGS?=
 BUILD_ID?=
@@ -335,17 +337,17 @@ docker:
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
 		-t $(DOCKER_IMAGE) .
 
-docker-cuda11:
+docker-cuda12:
 	docker build \
-		--build-arg CUDA_MAJOR_VERSION=11 \
-		--build-arg CUDA_MINOR_VERSION=8 \
+		--build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \
+		--build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		-t $(DOCKER_IMAGE)-cuda-11 .
+		-t $(DOCKER_IMAGE)-cuda-12 .
 
 docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"

From 2d41ac3f559d4999c236cb1f23639bbe4c2b57c3 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:51:57 +0100
Subject: [PATCH 08/25] build(backend): update backend Dockerfiles to Ubuntu
 24.04

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 backend/Dockerfile.golang | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang
index 3dc0d8c92988..c88074f3434e 100644
--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -162,6 +162,8 @@ EOT
 
 COPY . /LocalAI
 
+RUN git config --global --add safe.directory /LocalAI
+
 RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build
 
 FROM scratch

From ccf588c6c1c2c702036540f7c2f00ee72242a079 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:55:31 +0100
Subject: [PATCH 09/25] chore(backend): add ROCm env vars and default
 AMDGPU_TARGETS for hipBLAS builds

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 backend/go/stablediffusion-ggml/Makefile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile
index e1bb3dea0c20..ee81fc75942b 100644
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -28,7 +28,12 @@ else ifeq ($(BUILD_TYPE),clblas)
 	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON
+	ROCM_HOME ?= /opt/rocm
+	ROCM_PATH ?= /opt/rocm
+	export CXX=$(ROCM_HOME)/llvm/bin/clang++
+	export CC=$(ROCM_HOME)/llvm/bin/clang
+	AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
+	CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
 else ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON
 else ifeq ($(OS),Darwin)

From 4850ea3da3e15d87c3e62add101fbfe2aa20ae8e Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:56:06 +0100
Subject: [PATCH 10/25] chore(chatterbox): bump ROCm PyTorch to 2.9.1+rocm6.4
 and update index URL; align hipblas requirements

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 backend/python/chatterbox/requirements-hipblas.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt
index 6c21992a7585..ed30fb824107 100644
--- a/backend/python/chatterbox/requirements-hipblas.txt
+++ b/backend/python/chatterbox/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.6.0+rocm6.1
-torchaudio==2.6.0+rocm6.1
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.9.1+rocm6.4
+torchaudio==2.9.1+rocm6.4
 transformers
 numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289

From 52403f772c8a0b34d0feb686bd14f021aee5d123 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 20:36:16 +0100
Subject: [PATCH 11/25] chore: add local-ai-launcher to .gitignore

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index caae10a218a1..2ee2ab8588b1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ go-bert
 # LocalAI build binary
 LocalAI
 /local-ai
+/local-ai-launcher
 # prevent above rules from omitting the helm chart
 !charts/*
 # prevent above rules from omitting the api/localai folder

From 8c839543cc4db218b437a1a21af655fed79822f5 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 21:27:29 +0100
Subject: [PATCH 12/25] ci(workflows): fix backends GitHub Actions workflows
 after rebase

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .github/workflows/backend.yml | 80 +++++------------------------------
 1 file changed, 10 insertions(+), 70 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 2fd2dd504ac8..438908dc0414 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -39,7 +39,6 @@ jobs:
       #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }}
       matrix:
         include:
-          # CUDA 11 builds
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "9"
@@ -64,7 +63,7 @@ jobs:
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./""
+            context: "./"
             ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
@@ -78,6 +77,7 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
+            ubuntu-version: '2404'
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -224,7 +224,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
@@ -237,7 +237,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
@@ -250,7 +250,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
@@ -263,7 +263,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
@@ -276,7 +276,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-neutts'
@@ -722,7 +722,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "9"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-vibevoice'
@@ -893,18 +893,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -971,18 +959,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1104,18 +1080,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1156,18 +1120,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1196,7 +1148,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "9"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -1248,18 +1200,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-neutts'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "neutts"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1275,7 +1215,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "9"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'

From b0347d34d2a4e23bfa37ef760c5226cb5cf754cd Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Tue, 9 Dec 2025 20:37:28 +0100
Subject: [PATCH 13/25] build(docker): use build-time UBUNTU_VERSION variable

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 Dockerfile                   | 2 +-
 backend/Dockerfile.golang    | 1 +
 backend/Dockerfile.llama-cpp | 1 +
 backend/Dockerfile.python    | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 784c575387a2..47c5c59c927e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,7 +23,7 @@ ARG SKIP_DRIVERS=false
 ARG TARGETARCH
 ARG TARGETVARIANT
 ENV BUILD_TYPE=${BUILD_TYPE}
-ARG UBUNTU_VERSION=2204
+ARG UBUNTU_VERSION=2404
 
 RUN mkdir -p /run/localai
 RUN echo "default" > /run/localai/capability
diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang
index c88074f3434e..6de3f70509e9 100644
--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -13,6 +13,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
 ARG GO_VERSION=1.25.4
+ARG UBUNTU_VERSION=2404
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp
index 5800c320fbff..0ec4a8e2e26f 100644
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -70,6 +70,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
 ARG GO_VERSION=1.25.4
+ARG UBUNTU_VERSION=2404
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index b87f7e28762e..cfc4d3b60bac 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -12,7 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
-ARG UBUNTU_VERSION=2204
+ARG UBUNTU_VERSION=2404
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \

From 6a84969cb796f04edcdd2122647c62ecd27eac65 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Tue, 9 Dec 2025 20:38:54 +0100
Subject: [PATCH 14/25] chore(docker): remove libquadmath0 from
 requirements-stage base image

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 47c5c59c927e..3bba3bf34e17 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         ca-certificates curl wget espeak-ng libgomp1 \
-        ffmpeg libopenblas0 libopenblas-dev libquadmath0 && \
+        ffmpeg libopenblas0 libopenblas-dev && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 

From 72e4635763440fe456b9bd50b5fbdbdcfade2ff0 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Tue, 9 Dec 2025 20:39:56 +0100
Subject: [PATCH 15/25] chore(make): add backends/vllm to .NOTPARALLEL to
 prevent parallel builds

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index dfa7921ade90..a50505c7513d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 # Disable parallel execution for backend builds
-.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm
 
 GOCMD=go
 GOTEST=$(GOCMD) test

From b8b4994cca6ee22490d01a238dcce8497ea27273 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Tue, 9 Dec 2025 20:41:26 +0100
Subject: [PATCH 16/25] fix(docker): correct CUDA installation steps in backend
 Dockerfiles

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 backend/Dockerfile.golang    | 8 ++++++--
 backend/Dockerfile.llama-cpp | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang
index 6de3f70509e9..4fc3e0ee680c 100644
--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -76,10 +76,14 @@ RUN <<EOT bash
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils
         if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/arm64/cuda-keyring_1.1-1_all.deb
+            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
+            else
+                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
+            fi
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp
index 0ec4a8e2e26f..6fa5ad03e843 100644
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -133,6 +133,7 @@ RUN <<EOT bash
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils
         if [ "amd64" = "$TARGETARCH" ]; then
+            echo https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
             curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then

From b681c3d69dfbcb5a2fe4bc44a0ee3dc73a52bf1f Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Tue, 9 Dec 2025 20:42:56 +0100
Subject: [PATCH 17/25] chore(backend): update ROCm to 6.4 and align Python
 hipblas requirements

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 backend/python/bark/requirements-hipblas.txt            | 6 +++---
 backend/python/common/template/requirements-hipblas.txt | 2 +-
 backend/python/coqui/requirements-hipblas.txt           | 6 +++---
 backend/python/diffusers/requirements-hipblas.txt       | 6 +++---
 backend/python/faster-whisper/requirements-hipblas.txt  | 2 +-
 backend/python/kokoro/requirements-hipblas.txt          | 6 +++---
 backend/python/neutts/requirements-hipblas.txt          | 4 ++--
 backend/python/rerankers/requirements-hipblas.txt       | 4 ++--
 backend/python/rfdetr/requirements-hipblas.txt          | 6 +++---
 backend/python/transformers/requirements-hipblas.txt    | 4 ++--
 backend/python/vllm/requirements-hipblas.txt            | 2 +-
 11 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt
index 1d54fb165974..4e1fef6cfaa6 100644
--- a/backend/python/bark/requirements-hipblas.txt
+++ b/backend/python/bark/requirements-hipblas.txt
@@ -1,5 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.4.1+rocm6.0
-torchaudio==2.4.1+rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchaudio==2.8.0+rocm6.4
 transformers
 accelerate
\ No newline at end of file
diff --git a/backend/python/common/template/requirements-hipblas.txt b/backend/python/common/template/requirements-hipblas.txt
index 76018445f448..b733ec7b148b 100644
--- a/backend/python/common/template/requirements-hipblas.txt
+++ b/backend/python/common/template/requirements-hipblas.txt
@@ -1,2 +1,2 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
 torch
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt
index 55cdcdddb845..8e7d034591e3 100644
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.4.1+rocm6.0
-torchaudio==2.4.1+rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchaudio==2.8.0+rocm6.4
 transformers==4.48.3
 accelerate
 coqui-tts
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index aeea375639a0..b1f8b3e048c5 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
-torchvision==0.22.1+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchvision==0.23.0+rocm6.4
 git+https://github.com/huggingface/diffusers
 opencv-python
 transformers
diff --git a/backend/python/faster-whisper/requirements-hipblas.txt b/backend/python/faster-whisper/requirements-hipblas.txt
index 29413f0508b3..da9c9123c0d7 100644
--- a/backend/python/faster-whisper/requirements-hipblas.txt
+++ b/backend/python/faster-whisper/requirements-hipblas.txt
@@ -1,3 +1,3 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
 torch
 faster-whisper
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt
index 1226d917447b..74262df5c3ce 100644
--- a/backend/python/kokoro/requirements-hipblas.txt
+++ b/backend/python/kokoro/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
-torchaudio==2.7.1+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchaudio==2.8.0+rocm6.4
 transformers
 accelerate
 kokoro
diff --git a/backend/python/neutts/requirements-hipblas.txt b/backend/python/neutts/requirements-hipblas.txt
index 012d3c8bf6f5..72d11e059817 100644
--- a/backend/python/neutts/requirements-hipblas.txt
+++ b/backend/python/neutts/requirements-hipblas.txt
@@ -1,5 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.8.0+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
 transformers==4.56.1
 accelerate
 librosa==0.11.0
diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt
index b1c8baeddfe9..7a72b3d0650f 100644
--- a/backend/python/rerankers/requirements-hipblas.txt
+++ b/backend/python/rerankers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
 transformers
 accelerate
-torch==2.4.1+rocm6.0
+torch==2.8.0+rocm6.4
 rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rfdetr/requirements-hipblas.txt b/backend/python/rfdetr/requirements-hipblas.txt
index 536a31efb509..884cfba7be46 100644
--- a/backend/python/rfdetr/requirements-hipblas.txt
+++ b/backend/python/rfdetr/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
-torchvision==0.22.1+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchvision==0.23.0+rocm6.4
 rfdetr
 opencv-python
 accelerate
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
index 732a3adfcdc4..59f99e3643fa 100644
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
 accelerate
 transformers
 llvmlite==0.43.0
diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt
index 3a65e0d7c907..db732bc864ef 100644
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -1,4 +1,4 @@
---extra-index-url https://download.pytorch.org/whl/nightly/rocm6.3
+--extra-index-url https://download.pytorch.org/whl/nightly/rocm6.4
 accelerate
 torch
 transformers

From 05c983624e09e8aa08cf1b6e9c1449bb6c9ba7d5 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Fri, 12 Dec 2025 13:02:33 +0100
Subject: [PATCH 18/25] ci(workflows): switch GitHub Actions runners to
 Ubuntu-24.04 for CUDA on arm64 builds

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .github/workflows/backend.yml | 14 +++++++-------
 .github/workflows/image.yml   |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 438908dc0414..42fc4c9fef2a 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -46,7 +46,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-diffusers'
             runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
@@ -740,7 +740,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-kokoro'
             runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
@@ -860,7 +860,7 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
@@ -939,7 +939,7 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
@@ -1005,7 +1005,7 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-whisper'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
@@ -1100,7 +1100,7 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-rfdetr'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
@@ -1153,7 +1153,7 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-chatterbox'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index ad8ce97bcd4d..a97ac420e022 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -157,11 +157,11 @@ jobs:
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'true'
-            ubuntu-version: "2204"
+            ubuntu-version: "2404"
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"

From 5254fdd91b94f1ef58d335c688a39d4540b7ed8e Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Fri, 12 Dec 2025 13:09:16 +0100
Subject: [PATCH 19/25] build(docker): update base image and backend
 Dockerfiles for Ubuntu 24.04 compatibility on arm64

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 Dockerfile                   | 2 +-
 backend/Dockerfile.golang    | 8 ++------
 backend/Dockerfile.llama-cpp | 8 ++------
 backend/Dockerfile.python    | 8 ++------
 4 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3bba3bf34e17..fb35a9dfb0b7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -90,7 +90,7 @@ RUN <<EOT bash
             libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
+        if [ "arm64" = "$TARGETARCH" ]; then
             apt-get install -y --no-install-recommends \
             libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
         fi
diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang
index 4fc3e0ee680c..7843d2ba6b8d 100644
--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -79,11 +79,7 @@ RUN <<EOT bash
             curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
-            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
-            fi
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
@@ -95,7 +91,7 @@ RUN <<EOT bash
             libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
+        if [ "arm64" = "$TARGETARCH" ]; then
             apt-get install -y --no-install-recommends \
             libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
         fi
diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp
index 6fa5ad03e843..18199963725a 100644
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -137,11 +137,7 @@ RUN <<EOT bash
             curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
-            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
-            fi
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
@@ -153,7 +149,7 @@ RUN <<EOT bash
             libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
+        if [ "arm64" = "$TARGETARCH" ]; then
             apt-get install -y --no-install-recommends \
             libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
         fi
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index cfc4d3b60bac..a6e59e4def92 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -93,11 +93,7 @@ RUN <<EOT bash
             curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
-            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
-            fi
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
@@ -109,7 +105,7 @@ RUN <<EOT bash
             libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
+        if [ "arm64" = "$TARGETARCH" ]; then
             apt-get install -y --no-install-recommends \
             libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
         fi

From 740a3bbef2422e9ecd5208fdf305d4cd60ebea16 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Fri, 12 Dec 2025 13:10:29 +0100
Subject: [PATCH 20/25] build(backend): increase timeout for uv installs behind
 slow networks on backend/Dockerfile.python

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 backend/Dockerfile.python | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index a6e59e4def92..1a1c43b1822f 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -157,7 +157,8 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
 # Install uv as a system package
 RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
 ENV PATH="/root/.cargo/bin:${PATH}"
-
+# Increase timeout for uv installs behind slow networks
+ENV UV_HTTP_TIMEOUT=180
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 
 # Install grpcio-tools (the version in 22.04 is too old)

From cf4c4885a9f5c69dc66ec054eef914dbb0926480 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Sat, 13 Dec 2025 18:22:47 +0100
Subject: [PATCH 21/25] ci(workflows): switch GitHub Actions runners to
 Ubuntu-24.04 for vibevoice backend

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .github/workflows/backend.yml | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 42fc4c9fef2a..a4f08932b5ea 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -86,12 +86,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vibevoice'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -308,12 +308,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-vibevoice'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -378,7 +378,7 @@ jobs:
             ubuntu-version: '2404'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'l4t'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -596,12 +596,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-vibevoice'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -727,12 +727,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-vibevoice'
             runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -784,8 +784,8 @@ jobs:
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""

From 544c51aa9bf0b2321dc2a5650cb4a98fdc0cd76b Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Tue, 16 Dec 2025 15:42:46 +0100
Subject: [PATCH 22/25] ci(workflows): fix failing GitHub Actions runners

Signed-off-by: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
---
 .github/workflows/backend.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index a4f08932b5ea..9c24ca46fb95 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -857,7 +857,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "9"
             platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
             base-image: "ubuntu:24.04"
@@ -936,7 +936,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "9"
             platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml'
             base-image: "ubuntu:24.04"
@@ -1002,7 +1002,7 @@ jobs:
             cuda-major-version: "12"
             cuda-minor-version: "9"
             platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-whisper'
             base-image: "ubuntu:24.04"
@@ -1237,7 +1237,7 @@ jobs:
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
   backend-jobs-darwin:
     uses: ./.github/workflows/backend_build_darwin.yml

From 0710001db1d4b8e8d62e10e8eab3f0f3cbdf17d7 Mon Sep 17 00:00:00 2001
From: Richard Palethorpe <io@richiejp.com>
Date: Mon, 29 Dec 2025 11:42:19 +0000
Subject: [PATCH 23/25] fix: Allow FROM_SOURCE to be unset, use upstream Intel
 images etc.

Signed-off-by: Richard Palethorpe <io@richiejp.com>
---
 .github/workflows/backend.yml               | 34 ++++++++++-----------
 .github/workflows/generate_intel_image.yaml |  2 +-
 .github/workflows/image-pr.yml              | 14 ++++-----
 .github/workflows/image.yml                 | 16 +++++-----
 Makefile                                    |  2 +-
 backend/python/vllm/install.sh              |  2 +-
 6 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 9c24ca46fb95..8b6c38b9c7ca 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -649,7 +649,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
@@ -662,7 +662,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
@@ -675,7 +675,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
@@ -688,7 +688,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-vllm'
             runs-on: 'arc-runner-set'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
@@ -701,7 +701,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-transformers'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
@@ -714,7 +714,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
@@ -754,7 +754,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-kokoro'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
@@ -767,7 +767,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
@@ -780,7 +780,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-vibevoice'
             runs-on: 'arc-runner-set'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
@@ -793,7 +793,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-coqui'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
@@ -806,7 +806,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-bark'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
@@ -900,7 +900,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
@@ -913,7 +913,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
@@ -966,7 +966,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
@@ -979,7 +979,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
@@ -1087,7 +1087,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
@@ -1127,7 +1127,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-exllama2'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml
index 0dc47da211ec..c417ceeb8dbd 100644
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -53,7 +53,7 @@ jobs:
             BASE_IMAGE=${{ matrix.base-image }}
           context: .
           file: ./Dockerfile
-          tags: quay.io/go-skynet/intel-oneapi-base:latest
+          tags: quay.io/go-skynet/intel-oneapi-base:24.04
           push: true
           target: intel
           platforms: ${{ matrix.platforms }}
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 2db9e5cbafa4..9b7b9ec97b0b 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -44,7 +44,7 @@ jobs:
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:24.04"
             makeflags: "--jobs=3 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -54,7 +54,7 @@ jobs:
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=3 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -63,16 +63,16 @@ jobs:
             grpc-base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl'
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             grpc-base-image: "ubuntu:24.04"
             tag-suffix: 'sycl'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -80,7 +80,7 @@ jobs:
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:24.04"
             makeflags: "--jobs=4 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -91,4 +91,4 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
-            ubuntu-version: '2404'
\ No newline at end of file
+            ubuntu-version: '2404'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index a97ac420e022..7f2a316634c2 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -45,7 +45,7 @@ jobs:
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-hipblas"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
 
   core-image-build:
     uses: ./.github/workflows/image_build.yml
@@ -81,7 +81,7 @@ jobs:
             aio: "-aio-cpu"
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "9"
@@ -93,7 +93,7 @@ jobs:
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-12"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -105,7 +105,7 @@ jobs:
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-13"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -115,17 +115,17 @@ jobs:
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-vulkan"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             grpc-base-image: "ubuntu:24.04"
             tag-suffix: '-gpu-intel'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-intel"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
 
   gh-runner:
     uses: ./.github/workflows/image_build.yml
@@ -172,4 +172,4 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
-            ubuntu-version: '2404'
\ No newline at end of file
+            ubuntu-version: '2404'
diff --git a/Makefile b/Makefile
index a50505c7513d..040741f4c1f8 100644
--- a/Makefile
+++ b/Makefile
@@ -365,7 +365,7 @@ docker-aio-all:
 
 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=quay.io/go-skynet/intel-oneapi-base:latest \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh
index 364ff7a41b99..7dcd29db4a92 100755
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -28,7 +28,7 @@ fi
 
 # We don't embed this into the images as it is a large dependency and not always needed.
 # Besides, the speed inference are not actually usable in the current state for production use-cases.
-if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
+if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
         ensureVenv
         # https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
         if [ ! -d vllm ]; then

From 4198530a1f44f463b653760ebe9e812a17c3fd99 Mon Sep 17 00:00:00 2001
From: Richard Palethorpe <io@richiejp.com>
Date: Mon, 5 Jan 2026 15:06:53 +0000
Subject: [PATCH 24/25] chore(build): rm all traces of CUDA 11

Signed-off-by: Richard Palethorpe <io@richiejp.com>
---
 README.md                                     |  17 +--
 backend/README.md                             |   7 +-
 backend/index.yaml                            | 115 ------------------
 backend/python/README.md                      |   4 +-
 backend/python/bark/requirements-cublas11.txt |   5 -
 .../chatterbox/requirements-cublas11.txt      |   8 --
 backend/python/common/libbackend.sh           |   6 +-
 .../python/coqui/requirements-cublas11.txt    |   6 -
 .../diffusers/requirements-cublas11.txt       |  12 --
 .../python/exllama2/requirements-cublas11.txt |   4 -
 .../faster-whisper/requirements-cublas11.txt  |   9 --
 .../python/kokoro/requirements-cublas11.txt   |   7 --
 .../rerankers/requirements-cublas11.txt       |   5 -
 .../python/rfdetr/requirements-cublas11.txt   |   8 --
 .../transformers/requirements-cublas11.txt    |  10 --
 .../vibevoice/requirements-cublas11.txt       |  22 ----
 .../vllm/requirements-cublas11-after.txt      |   1 -
 backend/python/vllm/requirements-cublas11.txt |   5 -
 .../getting-started/container-images.md       |  15 +--
 docs/content/installation/docker.md           |  10 --
 docs/content/reference/compatibility-table.md |  12 +-
 21 files changed, 20 insertions(+), 268 deletions(-)
 delete mode 100644 backend/python/bark/requirements-cublas11.txt
 delete mode 100644 backend/python/chatterbox/requirements-cublas11.txt
 delete mode 100644 backend/python/coqui/requirements-cublas11.txt
 delete mode 100644 backend/python/diffusers/requirements-cublas11.txt
 delete mode 100644 backend/python/exllama2/requirements-cublas11.txt
 delete mode 100644 backend/python/faster-whisper/requirements-cublas11.txt
 delete mode 100644 backend/python/kokoro/requirements-cublas11.txt
 delete mode 100644 backend/python/rerankers/requirements-cublas11.txt
 delete mode 100644 backend/python/rfdetr/requirements-cublas11.txt
 delete mode 100644 backend/python/transformers/requirements-cublas11.txt
 delete mode 100644 backend/python/vibevoice/requirements-cublas11.txt
 delete mode 100644 backend/python/vllm/requirements-cublas11-after.txt
 delete mode 100644 backend/python/vllm/requirements-cublas11.txt

diff --git a/README.md b/README.md
index b3993940ed1e..092432241837 100644
--- a/README.md
+++ b/README.md
@@ -152,9 +152,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gp
 # CUDA 12.0
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
 
-# CUDA 11.7
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
-
 # NVIDIA Jetson (L4T) ARM64
 # CUDA 12 (for Nvidia AGX Orin and similar platforms)
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
@@ -193,9 +190,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai
 # NVIDIA CUDA 12 version
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
 
-# NVIDIA CUDA 11 version
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
-
 # Intel GPU version
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
 
@@ -279,9 +273,9 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 ### Text Generation & Language Models
 | Backend | Description | Acceleration Support |
 |---------|-------------|---------------------|
-| **llama.cpp** | LLM inference in C/C++ | CUDA 11/12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
+| **llama.cpp** | LLM inference in C/C++ | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
 | **vLLM** | Fast LLM inference with PagedAttention | CUDA 12/13, ROCm, Intel |
-| **transformers** | HuggingFace transformers framework | CUDA 11/12/13, ROCm, Intel, CPU |
+| **transformers** | HuggingFace transformers framework | CUDA 12/13, ROCm, Intel, CPU |
 | **exllama2** | GPTQ inference library | CUDA 12/13 |
 | **MLX** | Apple Silicon LLM inference | Metal (M1/M2/M3+) |
 | **MLX-VLM** | Apple Silicon Vision-Language Models | Metal (M1/M2/M3+) |
@@ -295,7 +289,7 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 | **bark-cpp** | C++ implementation of Bark | CUDA, Metal, CPU |
 | **coqui** | Advanced TTS with 1100+ languages | CUDA 12/13, ROCm, Intel, CPU |
 | **kokoro** | Lightweight TTS model | CUDA 12/13, ROCm, Intel, CPU |
-| **chatterbox** | Production-grade TTS | CUDA 11/12/13, CPU |
+| **chatterbox** | Production-grade TTS | CUDA 12/13, CPU |
 | **piper** | Fast neural TTS system | CPU |
 | **kitten-tts** | Kitten TTS models | CPU |
 | **silero-vad** | Voice Activity Detection | CPU |
@@ -306,13 +300,13 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 | Backend | Description | Acceleration Support |
 |---------|-------------|---------------------|
 | **stablediffusion.cpp** | Stable Diffusion in C/C++ | CUDA 12/13, Intel SYCL, Vulkan, CPU |
-| **diffusers** | HuggingFace diffusion models | CUDA 11/12/13, ROCm, Intel, Metal, CPU |
+| **diffusers** | HuggingFace diffusion models | CUDA 12/13, ROCm, Intel, Metal, CPU |
 
 ### Specialized AI Tasks
 | Backend | Description | Acceleration Support |
 |---------|-------------|---------------------|
 | **rfdetr** | Real-time object detection | CUDA 12/13, Intel, CPU |
-| **rerankers** | Document reranking API | CUDA 11/12/13, ROCm, Intel, CPU |
+| **rerankers** | Document reranking API | CUDA 12/13, ROCm, Intel, CPU |
 | **local-store** | Vector database | CPU |
 | **huggingface** | HuggingFace API integration | API-based |
 
@@ -320,7 +314,6 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 
 | Acceleration Type | Supported Backends | Hardware Support |
 |-------------------|-------------------|------------------|
-| **NVIDIA CUDA 11** | llama.cpp, whisper, stablediffusion, diffusers, rerankers, bark, chatterbox | Nvidia hardware |
 | **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware |
 | **NVIDIA CUDA 13** | All CUDA-compatible backends | Nvidia hardware |
 | **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark, neutts, vibevoice | AMD Graphics |
diff --git a/backend/README.md b/backend/README.md
index 87fd9f28f89c..3f3076c046f1 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -65,7 +65,7 @@ The backend system provides language-specific Dockerfiles that handle the build
 ## Hardware Acceleration Support
 
 ### CUDA (NVIDIA)
-- **Versions**: CUDA 11.x, 12.x
+- **Versions**: CUDA 12.x, 13.x
 - **Features**: cuBLAS, cuDNN, TensorRT optimization
 - **Targets**: x86_64, ARM64 (Jetson)
 
@@ -132,8 +132,7 @@ For ARM64/Mac builds, docker can't be used, and the makefile in the respective b
 ### Build Types
 
 - **`cpu`**: CPU-only optimization
-- **`cublas11`**: CUDA 11.x with cuBLAS
-- **`cublas12`**: CUDA 12.x with cuBLAS
+- **`cublas12`**, **`cublas13`**: CUDA 12.x, 13.x with cuBLAS
 - **`hipblas`**: ROCm with rocBLAS
 - **`intel`**: Intel oneAPI optimization
 - **`vulkan`**: Vulkan-based acceleration
@@ -210,4 +209,4 @@ When contributing to the backend system:
 2. **Add Tests**: Include comprehensive test coverage
 3. **Document**: Provide clear usage examples
 4. **Optimize**: Consider performance and resource usage
-5. **Validate**: Test across different hardware targets
\ No newline at end of file
+5. **Validate**: Test across different hardware targets
diff --git a/backend/index.yaml b/backend/index.yaml
index a383a15fdf3d..1f8c1f7fb4df 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -634,11 +634,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp"
   mirrors:
     - localai/localai-backends:master-cpu-llama-cpp
-- !!merge <<: *llamacpp
-  name: "cuda11-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-llama-cpp"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-llama-cpp
 - !!merge <<: *llamacpp
   name: "cuda12-llama-cpp"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp"
@@ -679,11 +674,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp"
   mirrors:
     - localai/localai-backends:master-metal-darwin-arm64-llama-cpp
-- !!merge <<: *llamacpp
-  name: "cuda11-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-llama-cpp"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-llama-cpp
 - !!merge <<: *llamacpp
   name: "cuda12-llama-cpp-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-llama-cpp"
@@ -755,11 +745,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper"
   mirrors:
     - localai/localai-backends:master-cpu-whisper
-- !!merge <<: *whispercpp
-  name: "cuda11-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-whisper"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-whisper
 - !!merge <<: *whispercpp
   name: "cuda12-whisper"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisper"
@@ -800,11 +785,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper"
   mirrors:
     - localai/localai-backends:master-metal-darwin-arm64-whisper
-- !!merge <<: *whispercpp
-  name: "cuda11-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-whisper"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-whisper
 - !!merge <<: *whispercpp
   name: "cuda12-whisper-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisper"
@@ -879,11 +859,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml"
   mirrors:
     - localai/localai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml
-- !!merge <<: *stablediffusionggml
-  name: "cuda11-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml
 - !!merge <<: *stablediffusionggml
   name: "cuda12-stablediffusion-ggml-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-stablediffusion-ggml"
@@ -899,11 +874,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml"
   mirrors:
     - localai/localai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml
-- !!merge <<: *stablediffusionggml
-  name: "cuda11-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml
 - !!merge <<: *stablediffusionggml
   name: "nvidia-l4t-arm64-stablediffusion-ggml-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml"
@@ -1054,11 +1024,6 @@
     intel: "intel-rerankers-development"
     amd: "rocm-rerankers-development"
     nvidia-cuda-13: "cuda13-rerankers-development"
-- !!merge <<: *rerankers
-  name: "cuda11-rerankers"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-rerankers
 - !!merge <<: *rerankers
   name: "cuda12-rerankers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers"
@@ -1074,11 +1039,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers"
   mirrors:
     - localai/localai-backends:latest-gpu-rocm-hipblas-rerankers
-- !!merge <<: *rerankers
-  name: "cuda11-rerankers-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-rerankers
 - !!merge <<: *rerankers
   name: "cuda12-rerankers-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers"
@@ -1127,16 +1087,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-transformers"
   mirrors:
     - localai/localai-backends:latest-gpu-intel-transformers
-- !!merge <<: *transformers
-  name: "cuda11-transformers-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-transformers
-- !!merge <<: *transformers
-  name: "cuda11-transformers"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-transformers
 - !!merge <<: *transformers
   name: "cuda12-transformers-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers"
@@ -1213,21 +1163,11 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers"
   mirrors:
     - localai/localai-backends:latest-gpu-rocm-hipblas-diffusers
-- !!merge <<: *diffusers
-  name: "cuda11-diffusers"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-diffusers
 - !!merge <<: *diffusers
   name: "intel-diffusers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-diffusers"
   mirrors:
     - localai/localai-backends:latest-gpu-intel-diffusers
-- !!merge <<: *diffusers
-  name: "cuda11-diffusers-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-diffusers
 - !!merge <<: *diffusers
   name: "cuda12-diffusers-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers"
@@ -1269,21 +1209,11 @@
   capabilities:
     nvidia: "cuda12-exllama2-development"
     intel: "intel-exllama2-development"
-- !!merge <<: *exllama2
-  name: "cuda11-exllama2"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-exllama2
 - !!merge <<: *exllama2
   name: "cuda12-exllama2"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2"
   mirrors:
     - localai/localai-backends:latest-gpu-nvidia-cuda-12-exllama2
-- !!merge <<: *exllama2
-  name: "cuda11-exllama2-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-exllama2
 - !!merge <<: *exllama2
   name: "cuda12-exllama2-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2"
@@ -1297,11 +1227,6 @@
     intel: "intel-kokoro-development"
     amd: "rocm-kokoro-development"
     nvidia-l4t: "nvidia-l4t-kokoro-development"
-- !!merge <<: *kokoro
-  name: "cuda11-kokoro-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-kokoro
 - !!merge <<: *kokoro
   name: "cuda12-kokoro-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro"
@@ -1332,11 +1257,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-kokoro"
   mirrors:
     - localai/localai-backends:master-nvidia-l4t-kokoro
-- !!merge <<: *kokoro
-  name: "cuda11-kokoro"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-kokoro
 - !!merge <<: *kokoro
   name: "cuda12-kokoro"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoro"
@@ -1365,11 +1285,6 @@
     intel: "intel-faster-whisper-development"
     amd: "rocm-faster-whisper-development"
     nvidia-cuda-13: "cuda13-faster-whisper-development"
-- !!merge <<: *faster-whisper
-  name: "cuda11-faster-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-faster-whisper
 - !!merge <<: *faster-whisper
   name: "cuda12-faster-whisper-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
@@ -1408,21 +1323,11 @@
     nvidia: "cuda12-coqui-development"
     intel: "intel-coqui-development"
     amd: "rocm-coqui-development"
-- !!merge <<: *coqui
-  name: "cuda11-coqui"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-coqui"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-coqui
 - !!merge <<: *coqui
   name: "cuda12-coqui"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui"
   mirrors:
     - localai/localai-backends:latest-gpu-nvidia-cuda-12-coqui
-- !!merge <<: *coqui
-  name: "cuda11-coqui-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-coqui
 - !!merge <<: *coqui
   name: "cuda12-coqui-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-coqui"
@@ -1455,16 +1360,6 @@
     nvidia: "cuda12-bark-development"
     intel: "intel-bark-development"
     amd: "rocm-bark-development"
-- !!merge <<: *bark
-  name: "cuda11-bark-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-bark
-- !!merge <<: *bark
-  name: "cuda11-bark"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-bark"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-bark
 - !!merge <<: *bark
   name: "rocm-bark-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark"
@@ -1546,16 +1441,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"
   mirrors:
     - localai/localai-backends:master-gpu-nvidia-cuda-12-chatterbox
-- !!merge <<: *chatterbox
-  name: "cuda11-chatterbox"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-chatterbox"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-chatterbox
-- !!merge <<: *chatterbox
-  name: "cuda11-chatterbox-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-chatterbox
 - !!merge <<: *chatterbox
   name: "cuda12-chatterbox"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox"
diff --git a/backend/python/README.md b/backend/python/README.md
index 8c76593467f8..9f894b77b596 100644
--- a/backend/python/README.md
+++ b/backend/python/README.md
@@ -85,7 +85,7 @@ runUnittests
 The build system automatically detects and configures for different hardware:
 
 - **CPU** - Standard CPU-only builds
-- **CUDA** - NVIDIA GPU acceleration (supports CUDA 11/12)
+- **CUDA** - NVIDIA GPU acceleration (supports CUDA 12/13)
 - **Intel** - Intel XPU/GPU optimization
 - **MLX** - Apple Silicon (M1/M2/M3) optimization
 - **HIP** - AMD GPU acceleration
@@ -95,8 +95,8 @@ The build system automatically detects and configures for different hardware:
 Backends can specify hardware-specific dependencies:
 - `requirements.txt` - Base requirements
 - `requirements-cpu.txt` - CPU-specific packages
-- `requirements-cublas11.txt` - CUDA 11 packages
 - `requirements-cublas12.txt` - CUDA 12 packages
+- `requirements-cublas13.txt` - CUDA 13 packages
 - `requirements-intel.txt` - Intel-optimized packages
 - `requirements-mps.txt` - Apple Silicon packages
 
diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt
deleted file mode 100644
index 9f8fe9ff87a3..000000000000
--- a/backend/python/bark/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-torchaudio==2.4.1+cu118
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/chatterbox/requirements-cublas11.txt b/backend/python/chatterbox/requirements-cublas11.txt
deleted file mode 100644
index 7b89d89d65f4..000000000000
--- a/backend/python/chatterbox/requirements-cublas11.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.6.0+cu118
-torchaudio==2.6.0+cu118
-transformers==4.46.3
-numpy>=1.24.0,<1.26.0
-# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
-chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
-accelerate
\ No newline at end of file
diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh
index 9af6ca6736f5..eb55f43d9547 100644
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -euo pipefail
 
-# 
+#
 # use the library by adding the following line to a script:
 # source $(dirname $0)/../common/libbackend.sh
 #
@@ -206,8 +206,8 @@ function init() {
 
 # getBuildProfile will inspect the system to determine which build profile is appropriate:
 # returns one of the following:
-# - cublas11
 # - cublas12
+# - cublas13
 # - hipblas
 # - intel
 function getBuildProfile() {
@@ -392,7 +392,7 @@ function runProtogen() {
 #  - requirements-${BUILD_TYPE}.txt
 #  - requirements-${BUILD_PROFILE}.txt
 #
-# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-11 or cuda-12
+# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-12 or cuda-13
 # it can also include some options that we do not have BUILD_TYPES for, ex: intel
 #
 # NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index.
diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt
deleted file mode 100644
index 97e1ef0a4afe..000000000000
--- a/backend/python/coqui/requirements-cublas11.txt
+++ /dev/null
@@ -1,6 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-torchaudio==2.4.1+cu118
-transformers==4.48.3
-accelerate
-coqui-tts
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt
deleted file mode 100644
index 7b77f7f68693..000000000000
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ /dev/null
@@ -1,12 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-git+https://github.com/huggingface/diffusers
-opencv-python
-transformers
-torchvision==0.22.1
-accelerate
-compel
-peft
-sentencepiece
-torch==2.7.1
-optimum-quanto
-ftfy
\ No newline at end of file
diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt
deleted file mode 100644
index 2d1958c75153..000000000000
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ /dev/null
@@ -1,4 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-cublas11.txt b/backend/python/faster-whisper/requirements-cublas11.txt
deleted file mode 100644
index b74532957a4a..000000000000
--- a/backend/python/faster-whisper/requirements-cublas11.txt
+++ /dev/null
@@ -1,9 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-faster-whisper
-opencv-python
-accelerate
-compel
-peft
-sentencepiece
-optimum-quanto
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-cublas11.txt b/backend/python/kokoro/requirements-cublas11.txt
deleted file mode 100644
index 628933b5640a..000000000000
--- a/backend/python/kokoro/requirements-cublas11.txt
+++ /dev/null
@@ -1,7 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.7.1+cu118
-torchaudio==2.7.1+cu118
-transformers
-accelerate
-kokoro
-soundfile
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt
deleted file mode 100644
index fef296fe8bb3..000000000000
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-transformers
-accelerate
-torch==2.4.1+cu118
-rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rfdetr/requirements-cublas11.txt b/backend/python/rfdetr/requirements-cublas11.txt
deleted file mode 100644
index 14449b3d4b00..000000000000
--- a/backend/python/rfdetr/requirements-cublas11.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.7.1+cu118
-rfdetr
-opencv-python
-accelerate
-inference
-peft
-optimum-quanto
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
deleted file mode 100644
index 8402f001e3cd..000000000000
--- a/backend/python/transformers/requirements-cublas11.txt
+++ /dev/null
@@ -1,10 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.7.1+cu118
-llvmlite==0.43.0
-numba==0.60.0
-accelerate
-transformers
-bitsandbytes
-outetts
-sentence-transformers==5.2.0
-protobuf==6.33.2
\ No newline at end of file
diff --git a/backend/python/vibevoice/requirements-cublas11.txt b/backend/python/vibevoice/requirements-cublas11.txt
deleted file mode 100644
index 547b198aa870..000000000000
--- a/backend/python/vibevoice/requirements-cublas11.txt
+++ /dev/null
@@ -1,22 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-git+https://github.com/huggingface/diffusers
-opencv-python
-transformers==4.51.3
-torchvision==0.22.1
-accelerate
-compel
-peft
-sentencepiece
-torch==2.7.1
-optimum-quanto
-ftfy
-llvmlite>=0.40.0
-numba>=0.57.0
-tqdm
-numpy
-scipy
-librosa
-ml-collections
-absl-py
-gradio
-av
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas11-after.txt b/backend/python/vllm/requirements-cublas11-after.txt
deleted file mode 100644
index 7bfe8efeb555..000000000000
--- a/backend/python/vllm/requirements-cublas11-after.txt
+++ /dev/null
@@ -1 +0,0 @@
-flash-attn
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt
deleted file mode 100644
index 95fdd7aea702..000000000000
--- a/backend/python/vllm/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-accelerate
-torch==2.7.0+cu118
-transformers
-bitsandbytes
\ No newline at end of file
diff --git a/docs/content/getting-started/container-images.md b/docs/content/getting-started/container-images.md
index 5f4db3929853..7ea98965de4f 100644
--- a/docs/content/getting-started/container-images.md
+++ b/docs/content/getting-started/container-images.md
@@ -50,16 +50,6 @@ Standard container images do not have pre-installed models. Use these if you wan
 
 {{% /tab %}}
 
-{{% tab title="GPU Images CUDA 11" %}}
-
-| Description | Quay | Docker Hub                                                  |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-11` | `localai/localai:master-gpu-nvidia-cuda-11`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-11`             |
-
-{{% /tab %}}
-
 {{% tab title="GPU Images CUDA 12" %}}
 
 | Description | Quay | Docker Hub                                                  |
@@ -169,11 +159,9 @@ services:
     image: localai/localai:latest-aio-cpu
     # For a specific version:
     # image: localai/localai:{{< version >}}-aio-cpu
-    # For Nvidia GPUs decomment one of the following (cuda11, cuda12, or cuda13):
-    # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-11
+    # For Nvidia GPUs decomment one of the following (cuda12 or cuda13):
     # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12
     # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-13
-    # image: localai/localai:latest-aio-gpu-nvidia-cuda-11
     # image: localai/localai:latest-aio-gpu-nvidia-cuda-12
     # image: localai/localai:latest-aio-gpu-nvidia-cuda-13
     healthcheck:
@@ -225,7 +213,6 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/lo
 | --- | --- |-----------------------------------------------|
 | Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu`                      |
 | Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu`             |
-| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11`                      |
 | Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12`                      |
 | Latest images for Nvidia GPU (CUDA13) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-13` | `localai/localai:latest-aio-gpu-nvidia-cuda-13`                      |
 | Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas`                      |
diff --git a/docs/content/installation/docker.md b/docs/content/installation/docker.md
index 1a3ea706c551..7cb354f98a8a 100644
--- a/docs/content/installation/docker.md
+++ b/docs/content/installation/docker.md
@@ -68,11 +68,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gp
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
 ```
 
-**NVIDIA CUDA 11:**
-```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
-```
-
 **AMD GPU (ROCm):**
 ```bash
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
@@ -122,11 +117,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
 ```
 
-**NVIDIA CUDA 11:**
-```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
-```
-
 **AMD GPU (ROCm):**
 ```bash
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
diff --git a/docs/content/reference/compatibility-table.md b/docs/content/reference/compatibility-table.md
index b34b3d452711..97bc61313dc0 100644
--- a/docs/content/reference/compatibility-table.md
+++ b/docs/content/reference/compatibility-table.md
@@ -18,9 +18,9 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
-| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}})        | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA 11/12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
+| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}})        | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
 | [vLLM](https://github.com/vllm-project/vllm)        | Various GPTs and quantization formats | yes                      | GPT             | no | no                  | CUDA 12/13, ROCm, Intel |
-| [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats  | yes                      | GPT, embeddings, Audio generation            | yes | yes*                  | CUDA 11/12/13, ROCm, Intel, CPU |
+| [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats  | yes                      | GPT, embeddings, Audio generation            | yes | yes*                  | CUDA 12/13, ROCm, Intel, CPU |
 | [exllama2](https://github.com/turboderp-org/exllamav2)  | GPTQ                   | yes                       | GPT only                  | no                               | no                   | CUDA 12/13 |
 | [MLX](https://github.com/ml-explore/mlx-lm)        | Various LLMs               | yes                       | GPT                        | no                                | no                   | Metal (Apple Silicon) |
 | [MLX-VLM](https://github.com/Blaizzy/mlx-vlm)        | Vision-Language Models               | yes                       | Multimodal GPT                        | no                                | no                   | Metal (Apple Silicon) |
@@ -37,7 +37,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | [bark-cpp](https://github.com/PABannier/bark.cpp)        | bark               | no                       | Audio-Only                 | no                                | no                   | CUDA, Metal, CPU |
 | [coqui](https://github.com/idiap/coqui-ai-TTS) | Coqui TTS    | no                       | Audio generation and Voice cloning    | no                               | no                   | CUDA 12/13, ROCm, Intel, CPU |
 | [kokoro](https://github.com/hexgrad/kokoro) | Kokoro TTS    | no                       | Text-to-speech    | no                               | no                   | CUDA 12/13, ROCm, Intel, CPU |
-| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS    | no                       | Text-to-speech    | no                               | no                   | CUDA 11/12/13, CPU |
+| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS    | no                       | Text-to-speech    | no                               | no                   | CUDA 12/13, CPU |
 | [kitten-tts](https://github.com/KittenML/KittenTTS) | Kitten TTS    | no                       | Text-to-speech    | no                               | no                   | CPU |
 | [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD    | no                       | Voice Activity Detection    | no                               | no                   | CPU |
 | [neutts](https://github.com/neuphonic/neuttsair) | NeuTTSAir    | no                       | Text-to-speech with voice cloning    | no                               | no                   | CUDA 12/13, ROCm, CPU |
@@ -49,7 +49,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
 | [stablediffusion.cpp](https://github.com/leejet/stable-diffusion.cpp)         | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker               | no                       | Image                 | no                                | no                   | CUDA 12/13, Intel SYCL, Vulkan, CPU |
-| [diffusers](https://github.com/huggingface/diffusers)  | SD, various diffusion models,...                   | no                       | Image/Video generation    | no                               | no                   | CUDA 11/12/13, ROCm, Intel, Metal, CPU |
+| [diffusers](https://github.com/huggingface/diffusers)  | SD, various diffusion models,...                   | no                       | Image/Video generation    | no                               | no                   | CUDA 12/13, ROCm, Intel, Metal, CPU |
 | [transformers-musicgen](https://github.com/huggingface/transformers)  | MusicGen                    | no                       | Audio generation                | no                               | no                   | CUDA, CPU |
 
 ## Specialized AI Tasks
@@ -57,14 +57,14 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
 | [rfdetr](https://github.com/roboflow/rf-detr) | RF-DETR    | no                       | Object Detection    | no                               | no                   | CUDA 12/13, Intel, CPU |
-| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API    | no                       | Reranking   | no                               | no                   | CUDA 11/12/13, ROCm, Intel, CPU |
+| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API    | no                       | Reranking   | no                               | no                   | CUDA 12/13, ROCm, Intel, CPU |
 | [local-store](https://github.com/mudler/LocalAI) | Vector database    | no                       | Vector storage   | yes                               | no                   | CPU |
 | [huggingface](https://huggingface.co/docs/hub/en/api) | HuggingFace API models    | yes                       | Various AI tasks   | yes                               | yes                   | API-based |
 
 ## Acceleration Support Summary
 
 ### GPU Acceleration
-- **NVIDIA CUDA**: CUDA 11.7, CUDA 12.0, CUDA 13.0 support across most backends
+- **NVIDIA CUDA**: CUDA 12.0, CUDA 13.0 support across most backends
 - **AMD ROCm**: HIP-based acceleration for AMD GPUs
 - **Intel oneAPI**: SYCL-based acceleration for Intel GPUs (F16/F32 precision)
 - **Vulkan**: Cross-platform GPU acceleration

From 6d04e235b4fb58850b8a375952fa74beabed5f2c Mon Sep 17 00:00:00 2001
From: Richard Palethorpe <io@richiejp.com>
Date: Tue, 6 Jan 2026 12:55:11 +0000
Subject: [PATCH 25/25] chore(build): Add Ubuntu codename as an argument

Signed-off-by: Richard Palethorpe <io@richiejp.com>
---
 .github/workflows/image.yml       | 11 +++++++++++
 .github/workflows/image_build.yml |  7 +++++++
 Dockerfile                        |  3 ++-
 Makefile                          |  8 ++++++++
 4 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 7f2a316634c2..3550113f531b 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -28,6 +28,7 @@ jobs:
       aio: ${{ matrix.aio }}
       makeflags: ${{ matrix.makeflags }}
       ubuntu-version: ${{ matrix.ubuntu-version }}
+      ubuntu-codename: ${{ matrix.ubuntu-codename }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -46,6 +47,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-hipblas"
             ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
 
   core-image-build:
     uses: ./.github/workflows/image_build.yml
@@ -63,6 +65,7 @@ jobs:
       makeflags: ${{ matrix.makeflags }}
       skip-drivers: ${{ matrix.skip-drivers }}
       ubuntu-version: ${{ matrix.ubuntu-version }}
+      ubuntu-codename: ${{ matrix.ubuntu-codename }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -82,6 +85,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
             ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "9"
@@ -94,6 +98,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-12"
             ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -106,6 +111,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-13"
             ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -116,6 +122,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-vulkan"
             ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
           - build-type: 'intel'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -126,6 +133,7 @@ jobs:
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-intel"
             ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
 
   gh-runner:
     uses: ./.github/workflows/image_build.yml
@@ -143,6 +151,7 @@ jobs:
       makeflags: ${{ matrix.makeflags }}
       skip-drivers: ${{ matrix.skip-drivers }}
       ubuntu-version: ${{ matrix.ubuntu-version }}
+      ubuntu-codename: ${{ matrix.ubuntu-codename }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -162,6 +171,7 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'true'
             ubuntu-version: "2404"
+            ubuntu-codename: 'noble'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -173,3 +183,4 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
             ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 39cfa1401052..d72da8af03a4 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -61,6 +61,11 @@ on:
         required: false
         default: '2204'
         type: string
+      ubuntu-codename:
+        description: 'Ubuntu codename'
+        required: false
+        default: 'noble'
+        type: string
     secrets:
       dockerUsername:
         required: true
@@ -244,6 +249,7 @@ jobs:
             MAKEFLAGS=${{ inputs.makeflags }}
             SKIP_DRIVERS=${{ inputs.skip-drivers }}
             UBUNTU_VERSION=${{ inputs.ubuntu-version }}
+            UBUNTU_CODENAME=${{ inputs.ubuntu-codename }}
           context: .
           file: ./Dockerfile
           cache-from: type=gha
@@ -272,6 +278,7 @@ jobs:
             MAKEFLAGS=${{ inputs.makeflags }}
             SKIP_DRIVERS=${{ inputs.skip-drivers }}
             UBUNTU_VERSION=${{ inputs.ubuntu-version }}
+            UBUNTU_CODENAME=${{ inputs.ubuntu-codename }}
           context: .
           file: ./Dockerfile
           cache-from: type=gha
diff --git a/Dockerfile b/Dockerfile
index fb35a9dfb0b7..4f1c125548f0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,7 @@
 ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
+ARG UBUNTU_CODENAME=noble
 
 FROM ${BASE_IMAGE} AS requirements
 
@@ -224,7 +225,7 @@ WORKDIR /build
 FROM ${INTEL_BASE_IMAGE} AS intel
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
 gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
+RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu ${UBUNTU_CODENAME}/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         intel-oneapi-runtime-libs && \
diff --git a/Makefile b/Makefile
index 040741f4c1f8..9eb59fefb8ba 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,7 @@ LAUNCHER_BINARY_NAME=local-ai-launcher
 CUDA_MAJOR_VERSION?=13
 CUDA_MINOR_VERSION?=0
 UBUNTU_VERSION?=2204
+UBUNTU_CODENAME?=noble
 
 GORELEASER?=
 
@@ -169,6 +170,7 @@ docker-build-aio:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		-t local-ai:tests -f Dockerfile .
 	BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio
@@ -199,6 +201,7 @@ prepare-e2e:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		-t localai-tests .
@@ -335,6 +338,7 @@ docker:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		-t $(DOCKER_IMAGE) .
 
 docker-cuda12:
@@ -347,6 +351,7 @@ docker-cuda12:
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		-t $(DOCKER_IMAGE)-cuda-12 .
 
 docker-aio:
@@ -357,6 +362,7 @@ docker-aio:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
 
 docker-aio-all:
@@ -373,6 +379,7 @@ docker-image-intel:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		-t $(DOCKER_IMAGE) .
 
 ########################################################
@@ -458,6 +465,7 @@ define docker-build-backend
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		$(if $(filter true,$(5)),--build-arg BACKEND=$(1)) \
 		-t local-ai-backend:$(1) -f backend/Dockerfile.$(2) $(3)
 endef