diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 66b1c683b88f..8b6c38b9c7ca 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -39,72 +39,19 @@ jobs:
       #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }}
       matrix:
         include:
-          # CUDA 11 builds
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rerankers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "transformers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-diffusers'
             runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -112,12 +59,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -125,92 +72,26 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          # CUDA 11 additional backends
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "kokoro"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "faster-whisper"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "coqui"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-bark'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "bark"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "chatterbox"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vibevoice'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -218,194 +99,194 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vllm'
             runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-transformers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-coqui'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-bark'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-neutts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # cuda 13
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -414,12 +295,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -427,12 +308,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-vibevoice'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -440,12 +321,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -466,12 +347,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-transformers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -479,12 +360,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -497,7 +378,7 @@ jobs:
             ubuntu-version: '2404'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'l4t'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -510,7 +391,7 @@ jobs:
             ubuntu-version: '2404'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -518,12 +399,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-kokoro'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -531,12 +412,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -544,12 +425,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-bark'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -557,12 +438,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -570,12 +451,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -596,12 +477,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -622,12 +503,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # hipblas builds
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -636,12 +517,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -649,12 +530,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -662,12 +543,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-vllm'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -675,12 +556,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-transformers'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -688,12 +569,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-diffusers'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # ROCm additional backends
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -702,12 +583,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-kokoro'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -715,12 +596,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-vibevoice'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -728,12 +609,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -741,12 +622,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-coqui'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -754,12 +635,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-bark'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
             # sycl builds
           - build-type: 'intel'
             cuda-major-version: ""
@@ -768,12 +649,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -781,12 +662,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -794,12 +675,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -807,12 +688,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-vllm'
             runs-on: 'arc-runner-set'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -820,12 +701,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-transformers'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -833,12 +714,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -846,12 +727,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-vibevoice'
             runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -859,12 +740,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-kokoro'
             runs-on: 'ubuntu-24.04-arm'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # SYCL additional backends
           - build-type: 'intel'
             cuda-major-version: ""
@@ -873,12 +754,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-kokoro'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -886,12 +767,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -899,12 +780,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-vibevoice'
             runs-on: 'arc-runner-set'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -912,12 +793,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-coqui'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -925,12 +806,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-bark'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # piper
           - build-type: ''
             cuda-major-version: ""
@@ -939,12 +820,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-piper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "piper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # bark-cpp
           - build-type: ''
             cuda-major-version: ""
@@ -953,12 +834,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-bark-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark-cpp"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -966,25 +847,25 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-llama-cpp'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -992,12 +873,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # Stablediffusion-ggml
           - build-type: ''
             cuda-major-version: ""
@@ -1006,25 +887,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1032,12 +900,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1045,12 +913,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1058,25 +926,25 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # whisper
           - build-type: ''
             cuda-major-version: ""
@@ -1085,25 +953,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1111,12 +966,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f32-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1124,12 +979,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-sycl-f16-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1137,38 +992,38 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
-            skip-drivers: 'true'
+            skip-drivers: 'false'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-whisper'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-whisper'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             runs-on: 'ubuntu-latest'
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           #silero-vad
           - build-type: ''
             cuda-major-version: ""
@@ -1177,12 +1032,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-silero-vad'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "silero-vad"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # local-store
           - build-type: ''
             cuda-major-version: ""
@@ -1191,12 +1046,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-local-store'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "local-store"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # huggingface
           - build-type: ''
             cuda-major-version: ""
@@ -1205,12 +1060,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-huggingface'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "huggingface"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # rfdetr
           - build-type: ''
             cuda-major-version: ""
@@ -1219,25 +1074,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1245,12 +1087,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1258,12 +1100,12 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-rfdetr'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # exllama2
           - build-type: ''
             cuda-major-version: ""
@@ -1272,25 +1114,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-exllama2'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1298,12 +1127,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-intel-exllama2'
             runs-on: 'ubuntu-latest'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1311,12 +1140,12 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-gpu-hipblas-exllama2'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             runs-on: 'ubuntu-latest'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1324,12 +1153,12 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-chatterbox'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # runs out of space on the runner
           # - build-type: 'hipblas'
           #   cuda-major-version: ""
@@ -1337,12 +1166,12 @@ jobs:
           #   platforms: 'linux/amd64'
           #   tag-latest: 'auto'
           #   tag-suffix: '-gpu-hipblas-rfdetr'
-          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   base-image: "rocm/dev-ubuntu-24.04:6.4.4"
           #   runs-on: 'ubuntu-latest'
           #   skip-drivers: 'false'
           #   backend: "rfdetr"
           #   dockerfile: "./backend/Dockerfile.python"
-          #   context: "./backend"
+          #   context: "./"
           # kitten-tts
           - build-type: ''
             cuda-major-version: ""
@@ -1351,12 +1180,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-kitten-tts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kitten-tts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           # neutts
           - build-type: ''
             cuda-major-version: ""
@@ -1365,12 +1194,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-neutts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1378,12 +1207,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-neutts'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1391,12 +1220,12 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64-neutts'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1404,12 +1233,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-vibevoice'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vibevoice"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2204'
+            context: "./"
+            ubuntu-version: '2404'
   backend-jobs-darwin:
     uses: ./.github/workflows/backend_build_darwin.yml
     strategy:
diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml
index feadf0948bdc..72a2b306741d 100644
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - grpc-base-image: ubuntu:22.04
+          - grpc-base-image: ubuntu:24.04
             runs-on: 'ubuntu-latest'
             platforms: 'linux/amd64,linux/arm64'
     runs-on: ${{matrix.runs-on}}
diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml
index 5c0160addb38..c417ceeb8dbd 100644
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04
             runs-on: 'arc-runner-set'
             platforms: 'linux/amd64'
     runs-on: ${{matrix.runs-on}}
@@ -53,7 +53,7 @@ jobs:
             BASE_IMAGE=${{ matrix.base-image }}
           context: .
           file: ./Dockerfile
-          tags: quay.io/go-skynet/intel-oneapi-base:latest
+          tags: quay.io/go-skynet/intel-oneapi-base:24.04
           push: true
           target: intel
           platforms: ${{ matrix.platforms }}
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 84ffa5a1320c..9b7b9ec97b0b 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -37,14 +37,14 @@ jobs:
         include:
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-gpu-nvidia-cuda-12'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             makeflags: "--jobs=3 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -54,33 +54,33 @@ jobs:
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=3 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-hipblas'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            grpc-base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl'
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
+            grpc-base-image: "ubuntu:24.04"
             tag-suffix: 'sycl'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-vulkan-core'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             makeflags: "--jobs=4 --output-sync=target"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -91,4 +91,4 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
-            ubuntu-version: '2404'
\ No newline at end of file
+            ubuntu-version: '2404'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 7389760912c5..3550113f531b 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -28,6 +28,7 @@ jobs:
       aio: ${{ matrix.aio }}
       makeflags: ${{ matrix.makeflags }}
       ubuntu-version: ${{ matrix.ubuntu-version }}
+      ubuntu-codename: ${{ matrix.ubuntu-codename }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -40,12 +41,13 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-hipblas'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            grpc-base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-hipblas"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
 
   core-image-build:
     uses: ./.github/workflows/image_build.yml
@@ -63,6 +65,7 @@ jobs:
       makeflags: ${{ matrix.makeflags }}
       skip-drivers: ${{ matrix.skip-drivers }}
       ubuntu-version: ${{ matrix.ubuntu-version }}
+      ubuntu-codename: ${{ matrix.ubuntu-codename }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -76,36 +79,26 @@ jobs:
             platforms: 'linux/amd64,linux/arm64'
             tag-latest: 'auto'
             tag-suffix: ''
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             aio: "-aio-cpu"
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
-            ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'false'
-            aio: "-aio-gpu-nvidia-cuda-11"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-12"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -117,27 +110,30 @@ jobs:
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-13"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-vulkan"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
           - build-type: 'intel'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
-            base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04"
+            grpc-base-image: "ubuntu:24.04"
             tag-suffix: '-gpu-intel'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-intel"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
 
   gh-runner:
     uses: ./.github/workflows/image_build.yml
@@ -155,6 +151,7 @@ jobs:
       makeflags: ${{ matrix.makeflags }}
       skip-drivers: ${{ matrix.skip-drivers }}
       ubuntu-version: ${{ matrix.ubuntu-version }}
+      ubuntu-codename: ${{ matrix.ubuntu-codename }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -165,15 +162,16 @@ jobs:
         include:
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64'
-            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-24.04-arm'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'true'
-            ubuntu-version: "2204"
+            ubuntu-version: "2404"
+            ubuntu-codename: 'noble'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -184,4 +182,5 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
-            ubuntu-version: '2404'
\ No newline at end of file
+            ubuntu-version: '2404'
+            ubuntu-codename: 'noble'
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 31a1f2310ea4..d72da8af03a4 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -23,7 +23,7 @@ on:
         type: string
       cuda-minor-version:
         description: 'CUDA minor version'
-        default: "4"
+        default: "9"
         type: string
       platforms:
         description: 'Platforms'
@@ -61,6 +61,11 @@ on:
         required: false
         default: '2204'
         type: string
+      ubuntu-codename:
+        description: 'Ubuntu codename'
+        required: false
+        default: 'noble'
+        type: string
     secrets:
       dockerUsername:
         required: true
@@ -244,6 +249,7 @@ jobs:
             MAKEFLAGS=${{ inputs.makeflags }}
             SKIP_DRIVERS=${{ inputs.skip-drivers }}
             UBUNTU_VERSION=${{ inputs.ubuntu-version }}
+            UBUNTU_CODENAME=${{ inputs.ubuntu-codename }}
           context: .
           file: ./Dockerfile
           cache-from: type=gha
@@ -272,6 +278,7 @@ jobs:
             MAKEFLAGS=${{ inputs.makeflags }}
             SKIP_DRIVERS=${{ inputs.skip-drivers }}
             UBUNTU_VERSION=${{ inputs.ubuntu-version }}
+            UBUNTU_CODENAME=${{ inputs.ubuntu-codename }}
           context: .
           file: ./Dockerfile
           cache-from: type=gha
diff --git a/.gitignore b/.gitignore
index caae10a218a1..2ee2ab8588b1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ go-bert
 # LocalAI build binary
 LocalAI
 /local-ai
+/local-ai-launcher
 # prevent above rules from omitting the helm chart
 !charts/*
 # prevent above rules from omitting the api/localai folder
diff --git a/Dockerfile b/Dockerfile
index 28147e75b856..4f1c125548f0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,7 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
+ARG UBUNTU_CODENAME=noble
 
 FROM ${BASE_IMAGE} AS requirements
 
@@ -9,7 +10,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         ca-certificates curl wget espeak-ng libgomp1 \
-        ffmpeg && \
+        ffmpeg libopenblas0 libopenblas-dev && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
@@ -23,7 +24,7 @@ ARG SKIP_DRIVERS=false
 ARG TARGETARCH
 ARG TARGETVARIANT
 ENV BUILD_TYPE=${BUILD_TYPE}
-ARG UBUNTU_VERSION=2204
+ARG UBUNTU_VERSION=2404
 
 RUN mkdir -p /run/localai
 RUN echo "default" > /run/localai/capability
@@ -34,11 +35,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils mesa-vulkan-drivers && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/* && \
         echo "vulkan" > /run/localai/capability
@@ -71,7 +91,7 @@ RUN <<EOT bash
             libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
+        if [ "arm64" = "$TARGETARCH" ]; then
             apt-get install -y --no-install-recommends \
             libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
         fi
@@ -141,13 +161,12 @@ ENV PATH=/opt/rocm/bin:${PATH}
 # The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM requirements-drivers AS build-requirements
 
-ARG GO_VERSION=1.22.6
+ARG GO_VERSION=1.25.4
 ARG CMAKE_VERSION=3.31.10
 ARG CMAKE_FROM_SOURCE=false
 ARG TARGETARCH
 ARG TARGETVARIANT
 
-
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         build-essential \
@@ -206,7 +225,7 @@ WORKDIR /build
 FROM ${INTEL_BASE_IMAGE} AS intel
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
 gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
+RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu ${UBUNTU_CODENAME}/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         intel-oneapi-runtime-libs && \
diff --git a/Dockerfile.aio b/Dockerfile.aio
index 81063bb4dbeb..ccc2fc94b9ed 100644
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} 
 
diff --git a/Makefile b/Makefile
index 6df349eb66d7..9eb59fefb8ba 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,6 @@
+# Disable parallel execution for backend builds
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm
+
 GOCMD=go
 GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
@@ -7,10 +10,13 @@ LAUNCHER_BINARY_NAME=local-ai-launcher
 CUDA_MAJOR_VERSION?=13
 CUDA_MINOR_VERSION?=0
 UBUNTU_VERSION?=2204
+UBUNTU_CODENAME?=noble
 
 GORELEASER?=
 
 export BUILD_TYPE?=
+export CUDA_MAJOR_VERSION?=12
+export CUDA_MINOR_VERSION?=9
 
 GO_TAGS?=
 BUILD_ID?=
@@ -164,6 +170,7 @@ docker-build-aio:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		-t local-ai:tests -f Dockerfile .
 	BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio
@@ -194,6 +201,7 @@ prepare-e2e:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		-t localai-tests .
@@ -318,7 +326,7 @@ test-extra: prepare-test-extra
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
-BASE_IMAGE?=ubuntu:22.04
+BASE_IMAGE?=ubuntu:24.04
 
 docker:
 	docker build \
@@ -330,19 +338,21 @@ docker:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		-t $(DOCKER_IMAGE) .
 
-docker-cuda11:
+docker-cuda12:
 	docker build \
-		--build-arg CUDA_MAJOR_VERSION=11 \
-		--build-arg CUDA_MINOR_VERSION=8 \
+		--build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \
+		--build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
-		-t $(DOCKER_IMAGE)-cuda-11 .
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
+		-t $(DOCKER_IMAGE)-cuda-12 .
 
 docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
@@ -352,6 +362,7 @@ docker-aio:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
 
 docker-aio-all:
@@ -360,7 +371,7 @@ docker-aio-all:
 
 docker-image-intel:
 	docker build \
-		--build-arg BASE_IMAGE=quay.io/go-skynet/intel-oneapi-base:latest \
+		--build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
@@ -368,6 +379,7 @@ docker-image-intel:
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		-t $(DOCKER_IMAGE) .
 
 ########################################################
@@ -453,6 +465,7 @@ define docker-build-backend
 		--build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \
 		--build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \
 		--build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \
+		--build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \
 		$(if $(filter true,$(5)),--build-arg BACKEND=$(1)) \
 		-t local-ai-backend:$(1) -f backend/Dockerfile.$(2) $(3)
 endef
diff --git a/README.md b/README.md
index b3993940ed1e..092432241837 100644
--- a/README.md
+++ b/README.md
@@ -152,9 +152,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gp
 # CUDA 12.0
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
 
-# CUDA 11.7
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
-
 # NVIDIA Jetson (L4T) ARM64
 # CUDA 12 (for Nvidia AGX Orin and similar platforms)
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64
@@ -193,9 +190,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai
 # NVIDIA CUDA 12 version
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
 
-# NVIDIA CUDA 11 version
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
-
 # Intel GPU version
 docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel
 
@@ -279,9 +273,9 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 ### Text Generation & Language Models
 | Backend | Description | Acceleration Support |
 |---------|-------------|---------------------|
-| **llama.cpp** | LLM inference in C/C++ | CUDA 11/12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
+| **llama.cpp** | LLM inference in C/C++ | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
 | **vLLM** | Fast LLM inference with PagedAttention | CUDA 12/13, ROCm, Intel |
-| **transformers** | HuggingFace transformers framework | CUDA 11/12/13, ROCm, Intel, CPU |
+| **transformers** | HuggingFace transformers framework | CUDA 12/13, ROCm, Intel, CPU |
 | **exllama2** | GPTQ inference library | CUDA 12/13 |
 | **MLX** | Apple Silicon LLM inference | Metal (M1/M2/M3+) |
 | **MLX-VLM** | Apple Silicon Vision-Language Models | Metal (M1/M2/M3+) |
@@ -295,7 +289,7 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 | **bark-cpp** | C++ implementation of Bark | CUDA, Metal, CPU |
 | **coqui** | Advanced TTS with 1100+ languages | CUDA 12/13, ROCm, Intel, CPU |
 | **kokoro** | Lightweight TTS model | CUDA 12/13, ROCm, Intel, CPU |
-| **chatterbox** | Production-grade TTS | CUDA 11/12/13, CPU |
+| **chatterbox** | Production-grade TTS | CUDA 12/13, CPU |
 | **piper** | Fast neural TTS system | CPU |
 | **kitten-tts** | Kitten TTS models | CPU |
 | **silero-vad** | Voice Activity Detection | CPU |
@@ -306,13 +300,13 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 | Backend | Description | Acceleration Support |
 |---------|-------------|---------------------|
 | **stablediffusion.cpp** | Stable Diffusion in C/C++ | CUDA 12/13, Intel SYCL, Vulkan, CPU |
-| **diffusers** | HuggingFace diffusion models | CUDA 11/12/13, ROCm, Intel, Metal, CPU |
+| **diffusers** | HuggingFace diffusion models | CUDA 12/13, ROCm, Intel, Metal, CPU |
 
 ### Specialized AI Tasks
 | Backend | Description | Acceleration Support |
 |---------|-------------|---------------------|
 | **rfdetr** | Real-time object detection | CUDA 12/13, Intel, CPU |
-| **rerankers** | Document reranking API | CUDA 11/12/13, ROCm, Intel, CPU |
+| **rerankers** | Document reranking API | CUDA 12/13, ROCm, Intel, CPU |
 | **local-store** | Vector database | CPU |
 | **huggingface** | HuggingFace API integration | API-based |
 
@@ -320,7 +314,6 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration
 
 | Acceleration Type | Supported Backends | Hardware Support |
 |-------------------|-------------------|------------------|
-| **NVIDIA CUDA 11** | llama.cpp, whisper, stablediffusion, diffusers, rerankers, bark, chatterbox | Nvidia hardware |
 | **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware |
 | **NVIDIA CUDA 13** | All CUDA-compatible backends | Nvidia hardware |
 | **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark, neutts, vibevoice | AMD Graphics |
diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang
index 1db39c9e1d63..7843d2ba6b8d 100644
--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
@@ -12,8 +12,8 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
-ARG GO_VERSION=1.22.6
-ARG UBUNTU_VERSION=2204
+ARG GO_VERSION=1.25.4
+ARG UBUNTU_VERSION=2404
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -40,11 +40,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
@@ -60,11 +79,7 @@ RUN <<EOT bash
             curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
-            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
-            fi
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
@@ -76,7 +91,7 @@ RUN <<EOT bash
             libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
+        if [ "arm64" = "$TARGETARCH" ]; then
             apt-get install -y --no-install-recommends \
             libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
         fi
@@ -148,6 +163,8 @@ EOT
 
 COPY . /LocalAI
 
+RUN git config --global --add safe.directory /LocalAI
+
 RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build
 
 FROM scratch
diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp
index 7990720af00f..18199963725a 100644
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 
 
@@ -69,8 +69,8 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
-ARG GO_VERSION=1.22.6
-ARG UBUNTU_VERSION=2204
+ARG GO_VERSION=1.25.4
+ARG UBUNTU_VERSION=2404
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -97,11 +97,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
@@ -114,14 +133,11 @@ RUN <<EOT bash
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils
         if [ "amd64" = "$TARGETARCH" ]; then
+            echo https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
             curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
-            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
-            fi
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
@@ -133,7 +149,7 @@ RUN <<EOT bash
             libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
+        if [ "arm64" = "$TARGETARCH" ]; then
             apt-get install -y --no-install-recommends \
             libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
         fi
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index 2faddea31bdc..1a1c43b1822f 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
@@ -12,7 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
-ARG UBUNTU_VERSION=2204
+ARG UBUNTU_VERSION=2404
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -54,11 +54,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
@@ -74,11 +93,7 @@ RUN <<EOT bash
             curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            if [ "${CUDA_MAJOR_VERSION}" = "13" ]; then
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
-            else
-                curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/arm64/cuda-keyring_1.1-1_all.deb
-            fi
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/sbsa/cuda-keyring_1.1-1_all.deb
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
@@ -90,7 +105,7 @@ RUN <<EOT bash
             libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
             libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
-        if [ "${CUDA_MAJOR_VERSION}" = "13" ] && [ "arm64" = "$TARGETARCH" ]; then
+        if [ "arm64" = "$TARGETARCH" ]; then
             apt-get install -y --no-install-recommends \
             libcufile-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcudnn9-cuda-${CUDA_MAJOR_VERSION} cuda-cupti-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libnvjitlink-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION}
         fi
@@ -142,7 +157,8 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ]; then \
 # Install uv as a system package
 RUN curl -LsSf https://astral.sh/uv/install.sh | UV_INSTALL_DIR=/usr/bin sh
 ENV PATH="/root/.cargo/bin:${PATH}"
-
+# Increase timeout for uv installs behind slow networks
+ENV UV_HTTP_TIMEOUT=180
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 
 # Install grpcio-tools (the version in 22.04 is too old)
@@ -155,9 +171,9 @@ RUN <<EOT bash
 EOT
 
 
-COPY python/${BACKEND} /${BACKEND}
-COPY backend.proto /${BACKEND}/backend.proto
-COPY python/common/ /${BACKEND}/common
+COPY backend/python/${BACKEND} /${BACKEND}
+COPY backend/backend.proto /${BACKEND}/backend.proto
+COPY backend/python/common/ /${BACKEND}/common
 
 RUN cd /${BACKEND} && PORTABLE_PYTHON=true make
 
diff --git a/backend/README.md b/backend/README.md
index 87fd9f28f89c..3f3076c046f1 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -65,7 +65,7 @@ The backend system provides language-specific Dockerfiles that handle the build
 ## Hardware Acceleration Support
 
 ### CUDA (NVIDIA)
-- **Versions**: CUDA 11.x, 12.x
+- **Versions**: CUDA 12.x, 13.x
 - **Features**: cuBLAS, cuDNN, TensorRT optimization
 - **Targets**: x86_64, ARM64 (Jetson)
 
@@ -132,8 +132,7 @@ For ARM64/Mac builds, docker can't be used, and the makefile in the respective b
 ### Build Types
 
 - **`cpu`**: CPU-only optimization
-- **`cublas11`**: CUDA 11.x with cuBLAS
-- **`cublas12`**: CUDA 12.x with cuBLAS
+- **`cublas12`**, **`cublas13`**: CUDA 12.x, 13.x with cuBLAS
 - **`hipblas`**: ROCm with rocBLAS
 - **`intel`**: Intel oneAPI optimization
 - **`vulkan`**: Vulkan-based acceleration
@@ -210,4 +209,4 @@ When contributing to the backend system:
 2. **Add Tests**: Include comprehensive test coverage
 3. **Document**: Provide clear usage examples
 4. **Optimize**: Consider performance and resource usage
-5. **Validate**: Test across different hardware targets
\ No newline at end of file
+5. **Validate**: Test across different hardware targets
diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile
index e1bb3dea0c20..ee81fc75942b 100644
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -28,7 +28,12 @@ else ifeq ($(BUILD_TYPE),clblas)
 	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON
+	ROCM_HOME ?= /opt/rocm
+	ROCM_PATH ?= /opt/rocm
+	export CXX=$(ROCM_HOME)/llvm/bin/clang++
+	export CC=$(ROCM_HOME)/llvm/bin/clang
+	AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
+	CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
 else ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON
 else ifeq ($(OS),Darwin)
diff --git a/backend/index.yaml b/backend/index.yaml
index a383a15fdf3d..1f8c1f7fb4df 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -634,11 +634,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-cpu-llama-cpp"
   mirrors:
     - localai/localai-backends:master-cpu-llama-cpp
-- !!merge <<: *llamacpp
-  name: "cuda11-llama-cpp"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-llama-cpp"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-llama-cpp
 - !!merge <<: *llamacpp
   name: "cuda12-llama-cpp"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp"
@@ -679,11 +674,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-llama-cpp"
   mirrors:
     - localai/localai-backends:master-metal-darwin-arm64-llama-cpp
-- !!merge <<: *llamacpp
-  name: "cuda11-llama-cpp-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-llama-cpp"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-llama-cpp
 - !!merge <<: *llamacpp
   name: "cuda12-llama-cpp-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-llama-cpp"
@@ -755,11 +745,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-cpu-whisper"
   mirrors:
     - localai/localai-backends:master-cpu-whisper
-- !!merge <<: *whispercpp
-  name: "cuda11-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-whisper"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-whisper
 - !!merge <<: *whispercpp
   name: "cuda12-whisper"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-whisper"
@@ -800,11 +785,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-whisper"
   mirrors:
     - localai/localai-backends:master-metal-darwin-arm64-whisper
-- !!merge <<: *whispercpp
-  name: "cuda11-whisper-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-whisper"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-whisper
 - !!merge <<: *whispercpp
   name: "cuda12-whisper-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-whisper"
@@ -879,11 +859,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml"
   mirrors:
     - localai/localai-backends:latest-gpu-intel-sycl-f16-stablediffusion-ggml
-- !!merge <<: *stablediffusionggml
-  name: "cuda11-stablediffusion-ggml"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-stablediffusion-ggml
 - !!merge <<: *stablediffusionggml
   name: "cuda12-stablediffusion-ggml-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-stablediffusion-ggml"
@@ -899,11 +874,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml"
   mirrors:
     - localai/localai-backends:master-gpu-intel-sycl-f16-stablediffusion-ggml
-- !!merge <<: *stablediffusionggml
-  name: "cuda11-stablediffusion-ggml-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-stablediffusion-ggml
 - !!merge <<: *stablediffusionggml
   name: "nvidia-l4t-arm64-stablediffusion-ggml-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-arm64-stablediffusion-ggml"
@@ -1054,11 +1024,6 @@
     intel: "intel-rerankers-development"
     amd: "rocm-rerankers-development"
     nvidia-cuda-13: "cuda13-rerankers-development"
-- !!merge <<: *rerankers
-  name: "cuda11-rerankers"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-rerankers
 - !!merge <<: *rerankers
   name: "cuda12-rerankers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers"
@@ -1074,11 +1039,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers"
   mirrors:
     - localai/localai-backends:latest-gpu-rocm-hipblas-rerankers
-- !!merge <<: *rerankers
-  name: "cuda11-rerankers-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-rerankers
 - !!merge <<: *rerankers
   name: "cuda12-rerankers-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers"
@@ -1127,16 +1087,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-transformers"
   mirrors:
     - localai/localai-backends:latest-gpu-intel-transformers
-- !!merge <<: *transformers
-  name: "cuda11-transformers-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-transformers
-- !!merge <<: *transformers
-  name: "cuda11-transformers"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-transformers
 - !!merge <<: *transformers
   name: "cuda12-transformers-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers"
@@ -1213,21 +1163,11 @@
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers"
   mirrors:
     - localai/localai-backends:latest-gpu-rocm-hipblas-diffusers
-- !!merge <<: *diffusers
-  name: "cuda11-diffusers"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-diffusers
 - !!merge <<: *diffusers
   name: "intel-diffusers"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-diffusers"
   mirrors:
     - localai/localai-backends:latest-gpu-intel-diffusers
-- !!merge <<: *diffusers
-  name: "cuda11-diffusers-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-diffusers
 - !!merge <<: *diffusers
   name: "cuda12-diffusers-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers"
@@ -1269,21 +1209,11 @@
   capabilities:
     nvidia: "cuda12-exllama2-development"
     intel: "intel-exllama2-development"
-- !!merge <<: *exllama2
-  name: "cuda11-exllama2"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-exllama2
 - !!merge <<: *exllama2
   name: "cuda12-exllama2"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2"
   mirrors:
     - localai/localai-backends:latest-gpu-nvidia-cuda-12-exllama2
-- !!merge <<: *exllama2
-  name: "cuda11-exllama2-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-exllama2
 - !!merge <<: *exllama2
   name: "cuda12-exllama2-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2"
@@ -1297,11 +1227,6 @@
     intel: "intel-kokoro-development"
     amd: "rocm-kokoro-development"
     nvidia-l4t: "nvidia-l4t-kokoro-development"
-- !!merge <<: *kokoro
-  name: "cuda11-kokoro-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-kokoro
 - !!merge <<: *kokoro
   name: "cuda12-kokoro-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-kokoro"
@@ -1332,11 +1257,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-kokoro"
   mirrors:
     - localai/localai-backends:master-nvidia-l4t-kokoro
-- !!merge <<: *kokoro
-  name: "cuda11-kokoro"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-kokoro"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-kokoro
 - !!merge <<: *kokoro
   name: "cuda12-kokoro"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-kokoro"
@@ -1365,11 +1285,6 @@
     intel: "intel-faster-whisper-development"
     amd: "rocm-faster-whisper-development"
     nvidia-cuda-13: "cuda13-faster-whisper-development"
-- !!merge <<: *faster-whisper
-  name: "cuda11-faster-whisper"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-faster-whisper"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-faster-whisper
 - !!merge <<: *faster-whisper
   name: "cuda12-faster-whisper-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-faster-whisper"
@@ -1408,21 +1323,11 @@
     nvidia: "cuda12-coqui-development"
     intel: "intel-coqui-development"
     amd: "rocm-coqui-development"
-- !!merge <<: *coqui
-  name: "cuda11-coqui"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-coqui"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-coqui
 - !!merge <<: *coqui
   name: "cuda12-coqui"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-coqui"
   mirrors:
     - localai/localai-backends:latest-gpu-nvidia-cuda-12-coqui
-- !!merge <<: *coqui
-  name: "cuda11-coqui-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-coqui
 - !!merge <<: *coqui
   name: "cuda12-coqui-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-coqui"
@@ -1455,16 +1360,6 @@
     nvidia: "cuda12-bark-development"
     intel: "intel-bark-development"
     amd: "rocm-bark-development"
-- !!merge <<: *bark
-  name: "cuda11-bark-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-bark
-- !!merge <<: *bark
-  name: "cuda11-bark"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-bark"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-bark
 - !!merge <<: *bark
   name: "rocm-bark-development"
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-bark"
@@ -1546,16 +1441,6 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"
   mirrors:
     - localai/localai-backends:master-gpu-nvidia-cuda-12-chatterbox
-- !!merge <<: *chatterbox
-  name: "cuda11-chatterbox"
-  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-chatterbox"
-  mirrors:
-    - localai/localai-backends:latest-gpu-nvidia-cuda-11-chatterbox
-- !!merge <<: *chatterbox
-  name: "cuda11-chatterbox-development"
-  uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox"
-  mirrors:
-    - localai/localai-backends:master-gpu-nvidia-cuda-11-chatterbox
 - !!merge <<: *chatterbox
   name: "cuda12-chatterbox"
   uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-chatterbox"
diff --git a/backend/python/README.md b/backend/python/README.md
index 8c76593467f8..9f894b77b596 100644
--- a/backend/python/README.md
+++ b/backend/python/README.md
@@ -85,7 +85,7 @@ runUnittests
 The build system automatically detects and configures for different hardware:
 
 - **CPU** - Standard CPU-only builds
-- **CUDA** - NVIDIA GPU acceleration (supports CUDA 11/12)
+- **CUDA** - NVIDIA GPU acceleration (supports CUDA 12/13)
 - **Intel** - Intel XPU/GPU optimization
 - **MLX** - Apple Silicon (M1/M2/M3) optimization
 - **HIP** - AMD GPU acceleration
@@ -95,8 +95,8 @@ The build system automatically detects and configures for different hardware:
 Backends can specify hardware-specific dependencies:
 - `requirements.txt` - Base requirements
 - `requirements-cpu.txt` - CPU-specific packages
-- `requirements-cublas11.txt` - CUDA 11 packages
 - `requirements-cublas12.txt` - CUDA 12 packages
+- `requirements-cublas13.txt` - CUDA 13 packages
 - `requirements-intel.txt` - Intel-optimized packages
 - `requirements-mps.txt` - Apple Silicon packages
 
diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt
deleted file mode 100644
index 9f8fe9ff87a3..000000000000
--- a/backend/python/bark/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-torchaudio==2.4.1+cu118
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt
index 1d54fb165974..4e1fef6cfaa6 100644
--- a/backend/python/bark/requirements-hipblas.txt
+++ b/backend/python/bark/requirements-hipblas.txt
@@ -1,5 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.4.1+rocm6.0
-torchaudio==2.4.1+rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchaudio==2.8.0+rocm6.4
 transformers
 accelerate
\ No newline at end of file
diff --git a/backend/python/chatterbox/requirements-cublas11.txt b/backend/python/chatterbox/requirements-cublas11.txt
deleted file mode 100644
index 7b89d89d65f4..000000000000
--- a/backend/python/chatterbox/requirements-cublas11.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.6.0+cu118
-torchaudio==2.6.0+cu118
-transformers==4.46.3
-numpy>=1.24.0,<1.26.0
-# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
-chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster
-accelerate
\ No newline at end of file
diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt
index 6c21992a7585..ed30fb824107 100644
--- a/backend/python/chatterbox/requirements-hipblas.txt
+++ b/backend/python/chatterbox/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.6.0+rocm6.1
-torchaudio==2.6.0+rocm6.1
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.9.1+rocm6.4
+torchaudio==2.9.1+rocm6.4
 transformers
 numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289
diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh
index 9af6ca6736f5..eb55f43d9547 100644
--- a/backend/python/common/libbackend.sh
+++ b/backend/python/common/libbackend.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -euo pipefail
 
-# 
+#
 # use the library by adding the following line to a script:
 # source $(dirname $0)/../common/libbackend.sh
 #
@@ -206,8 +206,8 @@ function init() {
 
 # getBuildProfile will inspect the system to determine which build profile is appropriate:
 # returns one of the following:
-# - cublas11
 # - cublas12
+# - cublas13
 # - hipblas
 # - intel
 function getBuildProfile() {
@@ -392,7 +392,7 @@ function runProtogen() {
 #  - requirements-${BUILD_TYPE}.txt
 #  - requirements-${BUILD_PROFILE}.txt
 #
-# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-11 or cuda-12
+# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-12 or cuda-13
 # it can also include some options that we do not have BUILD_TYPES for, ex: intel
 #
 # NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index.
diff --git a/backend/python/common/template/requirements-hipblas.txt b/backend/python/common/template/requirements-hipblas.txt
index 76018445f448..b733ec7b148b 100644
--- a/backend/python/common/template/requirements-hipblas.txt
+++ b/backend/python/common/template/requirements-hipblas.txt
@@ -1,2 +1,2 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
 torch
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt
deleted file mode 100644
index 97e1ef0a4afe..000000000000
--- a/backend/python/coqui/requirements-cublas11.txt
+++ /dev/null
@@ -1,6 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-torchaudio==2.4.1+cu118
-transformers==4.48.3
-accelerate
-coqui-tts
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt
index 55cdcdddb845..8e7d034591e3 100644
--- a/backend/python/coqui/requirements-hipblas.txt
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.4.1+rocm6.0
-torchaudio==2.4.1+rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchaudio==2.8.0+rocm6.4
 transformers==4.48.3
 accelerate
 coqui-tts
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt
deleted file mode 100644
index 7b77f7f68693..000000000000
--- a/backend/python/diffusers/requirements-cublas11.txt
+++ /dev/null
@@ -1,12 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-git+https://github.com/huggingface/diffusers
-opencv-python
-transformers
-torchvision==0.22.1
-accelerate
-compel
-peft
-sentencepiece
-torch==2.7.1
-optimum-quanto
-ftfy
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
index aeea375639a0..b1f8b3e048c5 100644
--- a/backend/python/diffusers/requirements-hipblas.txt
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
-torchvision==0.22.1+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchvision==0.23.0+rocm6.4
 git+https://github.com/huggingface/diffusers
 opencv-python
 transformers
diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt
deleted file mode 100644
index 2d1958c75153..000000000000
--- a/backend/python/exllama2/requirements-cublas11.txt
+++ /dev/null
@@ -1,4 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-transformers
-accelerate
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-cublas11.txt b/backend/python/faster-whisper/requirements-cublas11.txt
deleted file mode 100644
index b74532957a4a..000000000000
--- a/backend/python/faster-whisper/requirements-cublas11.txt
+++ /dev/null
@@ -1,9 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.4.1+cu118
-faster-whisper
-opencv-python
-accelerate
-compel
-peft
-sentencepiece
-optimum-quanto
\ No newline at end of file
diff --git a/backend/python/faster-whisper/requirements-hipblas.txt b/backend/python/faster-whisper/requirements-hipblas.txt
index 29413f0508b3..da9c9123c0d7 100644
--- a/backend/python/faster-whisper/requirements-hipblas.txt
+++ b/backend/python/faster-whisper/requirements-hipblas.txt
@@ -1,3 +1,3 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
 torch
 faster-whisper
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-cublas11.txt b/backend/python/kokoro/requirements-cublas11.txt
deleted file mode 100644
index 628933b5640a..000000000000
--- a/backend/python/kokoro/requirements-cublas11.txt
+++ /dev/null
@@ -1,7 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.7.1+cu118
-torchaudio==2.7.1+cu118
-transformers
-accelerate
-kokoro
-soundfile
\ No newline at end of file
diff --git a/backend/python/kokoro/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt
index 1226d917447b..74262df5c3ce 100644
--- a/backend/python/kokoro/requirements-hipblas.txt
+++ b/backend/python/kokoro/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
-torchaudio==2.7.1+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchaudio==2.8.0+rocm6.4
 transformers
 accelerate
 kokoro
diff --git a/backend/python/neutts/requirements-hipblas.txt b/backend/python/neutts/requirements-hipblas.txt
index 012d3c8bf6f5..72d11e059817 100644
--- a/backend/python/neutts/requirements-hipblas.txt
+++ b/backend/python/neutts/requirements-hipblas.txt
@@ -1,5 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.8.0+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
 transformers==4.56.1
 accelerate
 librosa==0.11.0
diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt
deleted file mode 100644
index fef296fe8bb3..000000000000
--- a/backend/python/rerankers/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-transformers
-accelerate
-torch==2.4.1+cu118
-rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt
index b1c8baeddfe9..7a72b3d0650f 100644
--- a/backend/python/rerankers/requirements-hipblas.txt
+++ b/backend/python/rerankers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
 transformers
 accelerate
-torch==2.4.1+rocm6.0
+torch==2.8.0+rocm6.4
 rerankers[transformers]
\ No newline at end of file
diff --git a/backend/python/rfdetr/requirements-cublas11.txt b/backend/python/rfdetr/requirements-cublas11.txt
deleted file mode 100644
index 14449b3d4b00..000000000000
--- a/backend/python/rfdetr/requirements-cublas11.txt
+++ /dev/null
@@ -1,8 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.7.1+cu118
-rfdetr
-opencv-python
-accelerate
-inference
-peft
-optimum-quanto
\ No newline at end of file
diff --git a/backend/python/rfdetr/requirements-hipblas.txt b/backend/python/rfdetr/requirements-hipblas.txt
index 536a31efb509..884cfba7be46 100644
--- a/backend/python/rfdetr/requirements-hipblas.txt
+++ b/backend/python/rfdetr/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
-torchvision==0.22.1+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
+torchvision==0.23.0+rocm6.4
 rfdetr
 opencv-python
 accelerate
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
deleted file mode 100644
index 8402f001e3cd..000000000000
--- a/backend/python/transformers/requirements-cublas11.txt
+++ /dev/null
@@ -1,10 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-torch==2.7.1+cu118
-llvmlite==0.43.0
-numba==0.60.0
-accelerate
-transformers
-bitsandbytes
-outetts
-sentence-transformers==5.2.0
-protobuf==6.33.2
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
index 732a3adfcdc4..59f99e3643fa 100644
--- a/backend/python/transformers/requirements-hipblas.txt
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -1,5 +1,5 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.3
-torch==2.7.1+rocm6.3
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.8.0+rocm6.4
 accelerate
 transformers
 llvmlite==0.43.0
diff --git a/backend/python/vibevoice/requirements-cublas11.txt b/backend/python/vibevoice/requirements-cublas11.txt
deleted file mode 100644
index 547b198aa870..000000000000
--- a/backend/python/vibevoice/requirements-cublas11.txt
+++ /dev/null
@@ -1,22 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-git+https://github.com/huggingface/diffusers
-opencv-python
-transformers==4.51.3
-torchvision==0.22.1
-accelerate
-compel
-peft
-sentencepiece
-torch==2.7.1
-optimum-quanto
-ftfy
-llvmlite>=0.40.0
-numba>=0.57.0
-tqdm
-numpy
-scipy
-librosa
-ml-collections
-absl-py
-gradio
-av
\ No newline at end of file
diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh
index 364ff7a41b99..7dcd29db4a92 100755
--- a/backend/python/vllm/install.sh
+++ b/backend/python/vllm/install.sh
@@ -28,7 +28,7 @@ fi
 
 # We don't embed this into the images as it is a large dependency and not always needed.
 # Besides, the speed inference are not actually usable in the current state for production use-cases.
-if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then
+if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE:-}" == "xtrue" ]; then
         ensureVenv
         # https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html
         if [ ! -d vllm ]; then
diff --git a/backend/python/vllm/requirements-cublas11-after.txt b/backend/python/vllm/requirements-cublas11-after.txt
deleted file mode 100644
index 7bfe8efeb555..000000000000
--- a/backend/python/vllm/requirements-cublas11-after.txt
+++ /dev/null
@@ -1 +0,0 @@
-flash-attn
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt
deleted file mode 100644
index 95fdd7aea702..000000000000
--- a/backend/python/vllm/requirements-cublas11.txt
+++ /dev/null
@@ -1,5 +0,0 @@
---extra-index-url https://download.pytorch.org/whl/cu118
-accelerate
-torch==2.7.0+cu118
-transformers
-bitsandbytes
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt
index 3a65e0d7c907..db732bc864ef 100644
--- a/backend/python/vllm/requirements-hipblas.txt
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -1,4 +1,4 @@
---extra-index-url https://download.pytorch.org/whl/nightly/rocm6.3
+--extra-index-url https://download.pytorch.org/whl/nightly/rocm6.4
 accelerate
 torch
 transformers
diff --git a/docker-compose.yaml b/docker-compose.yaml
index b9880352ad8f..765a3fb63b2e 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -11,7 +11,7 @@ services:
       dockerfile: Dockerfile
       args:
       - IMAGE_TYPE=core
-      - BASE_IMAGE=ubuntu:22.04
+      - BASE_IMAGE=ubuntu:24.04
     ports:
       - 8080:8080
     env_file:
diff --git a/docs/content/getting-started/container-images.md b/docs/content/getting-started/container-images.md
index 5f4db3929853..7ea98965de4f 100644
--- a/docs/content/getting-started/container-images.md
+++ b/docs/content/getting-started/container-images.md
@@ -50,16 +50,6 @@ Standard container images do not have pre-installed models. Use these if you wan
 
 {{% /tab %}}
 
-{{% tab title="GPU Images CUDA 11" %}}
-
-| Description | Quay | Docker Hub                                                  |
-| --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-11` | `localai/localai:master-gpu-nvidia-cuda-11`                      |
-| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-11`             |
-
-{{% /tab %}}
-
 {{% tab title="GPU Images CUDA 12" %}}
 
 | Description | Quay | Docker Hub                                                  |
@@ -169,11 +159,9 @@ services:
     image: localai/localai:latest-aio-cpu
     # For a specific version:
     # image: localai/localai:{{< version >}}-aio-cpu
-    # For Nvidia GPUs decomment one of the following (cuda11, cuda12, or cuda13):
-    # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-11
+    # For Nvidia GPUs decomment one of the following (cuda12 or cuda13):
     # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12
     # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-13
-    # image: localai/localai:latest-aio-gpu-nvidia-cuda-11
     # image: localai/localai:latest-aio-gpu-nvidia-cuda-12
     # image: localai/localai:latest-aio-gpu-nvidia-cuda-13
     healthcheck:
@@ -225,7 +213,6 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/lo
 | --- | --- |-----------------------------------------------|
 | Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu`                      |
 | Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu`             |
-| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11`                      |
 | Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12`                      |
 | Latest images for Nvidia GPU (CUDA13) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-13` | `localai/localai:latest-aio-gpu-nvidia-cuda-13`                      |
 | Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas`                      |
diff --git a/docs/content/installation/docker.md b/docs/content/installation/docker.md
index 1a3ea706c551..7cb354f98a8a 100644
--- a/docs/content/installation/docker.md
+++ b/docs/content/installation/docker.md
@@ -68,11 +68,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gp
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12
 ```
 
-**NVIDIA CUDA 11:**
-```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11
-```
-
 **AMD GPU (ROCm):**
 ```bash
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas
@@ -122,11 +117,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai
 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12
 ```
 
-**NVIDIA CUDA 11:**
-```bash
-docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11
-```
-
 **AMD GPU (ROCm):**
 ```bash
 docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas
diff --git a/docs/content/reference/compatibility-table.md b/docs/content/reference/compatibility-table.md
index b34b3d452711..97bc61313dc0 100644
--- a/docs/content/reference/compatibility-table.md
+++ b/docs/content/reference/compatibility-table.md
@@ -18,9 +18,9 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
-| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}})        | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA 11/12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
+| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}})        | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes                      | GPT and Functions                        | yes | yes                  | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU |
 | [vLLM](https://github.com/vllm-project/vllm)        | Various GPTs and quantization formats | yes                      | GPT             | no | no                  | CUDA 12/13, ROCm, Intel |
-| [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats  | yes                      | GPT, embeddings, Audio generation            | yes | yes*                  | CUDA 11/12/13, ROCm, Intel, CPU |
+| [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats  | yes                      | GPT, embeddings, Audio generation            | yes | yes*                  | CUDA 12/13, ROCm, Intel, CPU |
 | [exllama2](https://github.com/turboderp-org/exllamav2)  | GPTQ                   | yes                       | GPT only                  | no                               | no                   | CUDA 12/13 |
 | [MLX](https://github.com/ml-explore/mlx-lm)        | Various LLMs               | yes                       | GPT                        | no                                | no                   | Metal (Apple Silicon) |
 | [MLX-VLM](https://github.com/Blaizzy/mlx-vlm)        | Vision-Language Models               | yes                       | Multimodal GPT                        | no                                | no                   | Metal (Apple Silicon) |
@@ -37,7 +37,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | [bark-cpp](https://github.com/PABannier/bark.cpp)        | bark               | no                       | Audio-Only                 | no                                | no                   | CUDA, Metal, CPU |
 | [coqui](https://github.com/idiap/coqui-ai-TTS) | Coqui TTS    | no                       | Audio generation and Voice cloning    | no                               | no                   | CUDA 12/13, ROCm, Intel, CPU |
 | [kokoro](https://github.com/hexgrad/kokoro) | Kokoro TTS    | no                       | Text-to-speech    | no                               | no                   | CUDA 12/13, ROCm, Intel, CPU |
-| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS    | no                       | Text-to-speech    | no                               | no                   | CUDA 11/12/13, CPU |
+| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS    | no                       | Text-to-speech    | no                               | no                   | CUDA 12/13, CPU |
 | [kitten-tts](https://github.com/KittenML/KittenTTS) | Kitten TTS    | no                       | Text-to-speech    | no                               | no                   | CPU |
 | [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD    | no                       | Voice Activity Detection    | no                               | no                   | CPU |
 | [neutts](https://github.com/neuphonic/neuttsair) | NeuTTSAir    | no                       | Text-to-speech with voice cloning    | no                               | no                   | CUDA 12/13, ROCm, CPU |
@@ -49,7 +49,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
 | [stablediffusion.cpp](https://github.com/leejet/stable-diffusion.cpp)         | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker               | no                       | Image                 | no                                | no                   | CUDA 12/13, Intel SYCL, Vulkan, CPU |
-| [diffusers](https://github.com/huggingface/diffusers)  | SD, various diffusion models,...                   | no                       | Image/Video generation    | no                               | no                   | CUDA 11/12/13, ROCm, Intel, Metal, CPU |
+| [diffusers](https://github.com/huggingface/diffusers)  | SD, various diffusion models,...                   | no                       | Image/Video generation    | no                               | no                   | CUDA 12/13, ROCm, Intel, Metal, CPU |
 | [transformers-musicgen](https://github.com/huggingface/transformers)  | MusicGen                    | no                       | Audio generation                | no                               | no                   | CUDA, CPU |
 
 ## Specialized AI Tasks
@@ -57,14 +57,14 @@ LocalAI will attempt to automatically load models which are not explicitly confi
 | Backend and Bindings                                                             | Compatible models     | Completion/Chat endpoint | Capability | Embeddings support                | Token stream support | Acceleration |
 |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
 | [rfdetr](https://github.com/roboflow/rf-detr) | RF-DETR    | no                       | Object Detection    | no                               | no                   | CUDA 12/13, Intel, CPU |
-| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API    | no                       | Reranking   | no                               | no                   | CUDA 11/12/13, ROCm, Intel, CPU |
+| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API    | no                       | Reranking   | no                               | no                   | CUDA 12/13, ROCm, Intel, CPU |
 | [local-store](https://github.com/mudler/LocalAI) | Vector database    | no                       | Vector storage   | yes                               | no                   | CPU |
 | [huggingface](https://huggingface.co/docs/hub/en/api) | HuggingFace API models    | yes                       | Various AI tasks   | yes                               | yes                   | API-based |
 
 ## Acceleration Support Summary
 
 ### GPU Acceleration
-- **NVIDIA CUDA**: CUDA 11.7, CUDA 12.0, CUDA 13.0 support across most backends
+- **NVIDIA CUDA**: CUDA 12.0, CUDA 13.0 support across most backends
 - **AMD ROCm**: HIP-based acceleration for AMD GPUs
 - **Intel oneAPI**: SYCL-based acceleration for Intel GPUs (F16/F32 precision)
 - **Vulkan**: Cross-platform GPU acceleration