diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 66b1c683b88f..8b6c38b9c7ca 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -39,72 +39,19 @@ jobs: #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }} matrix: include: - # CUDA 11 builds - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-rerankers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rerankers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-transformers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "transformers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-diffusers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-diffusers' runs-on: 'ubuntu-24.04-arm' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -112,12 +59,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -125,92 +72,26 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - # CUDA 11 additional backends - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-kokoro' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "kokoro" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "faster-whisper" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-coqui' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "coqui" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-bark' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-chatterbox' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "chatterbox" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vibevoice' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -218,194 +99,194 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rerankers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' runs-on: 'arc-runner-set' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-neutts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # cuda 13 - build-type: 'cublas' cuda-major-version: "13" @@ -414,12 +295,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-rerankers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -427,12 +308,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-vibevoice' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -440,12 +321,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -466,12 +347,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-transformers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -479,12 +360,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "13" cuda-minor-version: "0" @@ -497,7 +378,7 @@ jobs: ubuntu-version: '2404' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'l4t' cuda-major-version: "13" cuda-minor-version: "0" @@ -510,7 +391,7 @@ jobs: ubuntu-version: '2404' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -518,12 +399,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-kokoro' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -531,12 +412,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -544,12 +425,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-bark' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -557,12 +438,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -570,12 +451,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -596,12 +477,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -622,12 +503,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # hipblas builds - build-type: 'hipblas' cuda-major-version: "" @@ -636,12 +517,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-rerankers' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -649,12 +530,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -662,12 +543,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vllm' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -675,12 +556,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-transformers' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -688,12 +569,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-diffusers' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # ROCm additional backends - build-type: 'hipblas' cuda-major-version: "" @@ -702,12 +583,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-kokoro' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -715,12 +596,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vibevoice' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -728,12 +609,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -741,12 +622,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-coqui' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -754,12 +635,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-bark' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # sycl builds - build-type: 'intel' cuda-major-version: "" @@ -768,12 +649,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-rerankers' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -781,12 +662,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -794,12 +675,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -807,12 +688,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-vllm' runs-on: 'arc-runner-set' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -820,12 +701,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-transformers' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -833,12 +714,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-diffusers' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -846,12 +727,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-nvidia-l4t-vibevoice' runs-on: 'ubuntu-24.04-arm' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -859,12 +740,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-nvidia-l4t-kokoro' runs-on: 'ubuntu-24.04-arm' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # SYCL additional backends - build-type: 'intel' cuda-major-version: "" @@ -873,12 +754,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-kokoro' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -886,12 +767,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -899,12 +780,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-vibevoice' runs-on: 'arc-runner-set' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -912,12 +793,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-coqui' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -925,12 +806,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-bark' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # piper - build-type: '' cuda-major-version: "" @@ -939,12 +820,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-piper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "piper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # bark-cpp - build-type: '' cuda-major-version: "" @@ -953,12 +834,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-bark-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark-cpp" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -966,25 +847,25 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-llama-cpp' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -992,12 +873,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # Stablediffusion-ggml - build-type: '' cuda-major-version: "" @@ -1006,25 +887,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1032,12 +900,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -1045,12 +913,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -1058,25 +926,25 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-stablediffusion-ggml' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # whisper - build-type: '' cuda-major-version: "" @@ -1085,25 +953,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1111,12 +966,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f32-whisper' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -1124,12 +979,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-sycl-f16-whisper' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -1137,38 +992,38 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' - skip-drivers: 'true' + skip-drivers: 'false' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-whisper' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-whisper' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" runs-on: 'ubuntu-latest' skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' #silero-vad - build-type: '' cuda-major-version: "" @@ -1177,12 +1032,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-silero-vad' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "silero-vad" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # local-store - build-type: '' cuda-major-version: "" @@ -1191,12 +1046,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-local-store' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "local-store" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # huggingface - build-type: '' cuda-major-version: "" @@ -1205,12 +1060,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-huggingface' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "huggingface" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # rfdetr - build-type: '' cuda-major-version: "" @@ -1219,25 +1074,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1245,12 +1087,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-rfdetr' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1258,12 +1100,12 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-rfdetr' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # exllama2 - build-type: '' cuda-major-version: "" @@ -1272,25 +1114,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-exllama2' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1298,12 +1127,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-intel-exllama2' runs-on: 'ubuntu-latest' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1311,12 +1140,12 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-gpu-hipblas-exllama2' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" runs-on: 'ubuntu-latest' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1324,12 +1153,12 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-chatterbox' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # runs out of space on the runner # - build-type: 'hipblas' # cuda-major-version: "" @@ -1337,12 +1166,12 @@ jobs: # platforms: 'linux/amd64' # tag-latest: 'auto' # tag-suffix: '-gpu-hipblas-rfdetr' - # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # base-image: "rocm/dev-ubuntu-24.04:6.4.4" # runs-on: 'ubuntu-latest' # skip-drivers: 'false' # backend: "rfdetr" # dockerfile: "./backend/Dockerfile.python" - # context: "./backend" + # context: "./" # kitten-tts - build-type: '' cuda-major-version: "" @@ -1351,12 +1180,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-kitten-tts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kitten-tts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' # neutts - build-type: '' cuda-major-version: "" @@ -1365,12 +1194,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-neutts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1378,12 +1207,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-neutts' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1391,12 +1220,12 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64-neutts' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -1404,12 +1233,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-vibevoice' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vibevoice" dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2204' + context: "./" + ubuntu-version: '2404' backend-jobs-darwin: uses: ./.github/workflows/backend_build_darwin.yml strategy: diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index feadf0948bdc..72a2b306741d 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -16,7 +16,7 @@ jobs: strategy: matrix: include: - - grpc-base-image: ubuntu:22.04 + - grpc-base-image: ubuntu:24.04 runs-on: 'ubuntu-latest' platforms: 'linux/amd64,linux/arm64' runs-on: ${{matrix.runs-on}} diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml index 5c0160addb38..c417ceeb8dbd 100644 --- a/.github/workflows/generate_intel_image.yaml +++ b/.github/workflows/generate_intel_image.yaml @@ -15,7 +15,7 @@ jobs: strategy: matrix: include: - - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04 + - base-image: intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 runs-on: 'arc-runner-set' platforms: 'linux/amd64' runs-on: ${{matrix.runs-on}} @@ -53,7 +53,7 @@ jobs: BASE_IMAGE=${{ matrix.base-image }} context: . file: ./Dockerfile - tags: quay.io/go-skynet/intel-oneapi-base:latest + tags: quay.io/go-skynet/intel-oneapi-base:24.04 push: true target: intel platforms: ${{ matrix.platforms }} diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 84ffa5a1320c..9b7b9ec97b0b 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -37,14 +37,14 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-gpu-nvidia-cuda-12' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" makeflags: "--jobs=3 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -54,33 +54,33 @@ jobs: runs-on: 'ubuntu-latest' base-image: "ubuntu:22.04" makeflags: "--jobs=3 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-hipblas' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - grpc-base-image: "ubuntu:22.04" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl' platforms: 'linux/amd64' tag-latest: 'false' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + grpc-base-image: "ubuntu:24.04" tag-suffix: 'sycl' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-vulkan-core' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" makeflags: "--jobs=4 --output-sync=target" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -91,4 +91,4 @@ jobs: runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' - ubuntu-version: '2404' \ No newline at end of file + ubuntu-version: '2404' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 7389760912c5..3550113f531b 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -28,6 +28,7 @@ jobs: aio: ${{ matrix.aio }} makeflags: ${{ matrix.makeflags }} ubuntu-version: ${{ matrix.ubuntu-version }} + ubuntu-codename: ${{ matrix.ubuntu-codename }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -40,12 +41,13 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-hipblas' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - grpc-base-image: "ubuntu:22.04" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-hipblas" - ubuntu-version: '2204' + ubuntu-version: '2404' + ubuntu-codename: 'noble' core-image-build: uses: ./.github/workflows/image_build.yml @@ -63,6 +65,7 @@ jobs: makeflags: ${{ matrix.makeflags }} skip-drivers: ${{ matrix.skip-drivers }} ubuntu-version: ${{ matrix.ubuntu-version }} + ubuntu-codename: ${{ matrix.ubuntu-codename }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -76,36 +79,26 @@ jobs: platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' aio: "-aio-cpu" makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' - ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - makeflags: "--jobs=4 --output-sync=target" - skip-drivers: 'false' - aio: "-aio-gpu-nvidia-cuda-11" - ubuntu-version: '2204' + ubuntu-version: '2404' + ubuntu-codename: 'noble' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-12" - ubuntu-version: '2204' + ubuntu-version: '2404' + ubuntu-codename: 'noble' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -117,27 +110,30 @@ jobs: skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-13" - ubuntu-version: '2204' + ubuntu-version: '2404' + ubuntu-codename: 'noble' - build-type: 'vulkan' platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-vulkan' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-vulkan" - ubuntu-version: '2204' + ubuntu-version: '2404' + ubuntu-codename: 'noble' - build-type: 'intel' platforms: 'linux/amd64' tag-latest: 'auto' - base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" + base-image: "intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04" + grpc-base-image: "ubuntu:24.04" tag-suffix: '-gpu-intel' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-intel" - ubuntu-version: '2204' + ubuntu-version: '2404' + ubuntu-codename: 'noble' gh-runner: uses: ./.github/workflows/image_build.yml @@ -155,6 +151,7 @@ jobs: makeflags: ${{ matrix.makeflags }} skip-drivers: ${{ matrix.skip-drivers }} ubuntu-version: ${{ matrix.ubuntu-version }} + ubuntu-codename: ${{ matrix.ubuntu-codename }} secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }} @@ -165,15 +162,16 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64' - base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'true' - ubuntu-version: "2204" + ubuntu-version: "2404" + ubuntu-codename: 'noble' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -184,4 +182,5 @@ jobs: runs-on: 'ubuntu-24.04-arm' makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' - ubuntu-version: '2404' \ No newline at end of file + ubuntu-version: '2404' + ubuntu-codename: 'noble' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 31a1f2310ea4..d72da8af03a4 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -23,7 +23,7 @@ on: type: string cuda-minor-version: description: 'CUDA minor version' - default: "4" + default: "9" type: string platforms: description: 'Platforms' @@ -61,6 +61,11 @@ on: required: false default: '2204' type: string + ubuntu-codename: + description: 'Ubuntu codename' + required: false + default: 'noble' + type: string secrets: dockerUsername: required: true @@ -244,6 +249,7 @@ jobs: MAKEFLAGS=${{ inputs.makeflags }} SKIP_DRIVERS=${{ inputs.skip-drivers }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} + UBUNTU_CODENAME=${{ inputs.ubuntu-codename }} context: . file: ./Dockerfile cache-from: type=gha @@ -272,6 +278,7 @@ jobs: MAKEFLAGS=${{ inputs.makeflags }} SKIP_DRIVERS=${{ inputs.skip-drivers }} UBUNTU_VERSION=${{ inputs.ubuntu-version }} + UBUNTU_CODENAME=${{ inputs.ubuntu-codename }} context: . file: ./Dockerfile cache-from: type=gha diff --git a/.gitignore b/.gitignore index caae10a218a1..2ee2ab8588b1 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ go-bert # LocalAI build binary LocalAI /local-ai +/local-ai-launcher # prevent above rules from omitting the helm chart !charts/* # prevent above rules from omitting the api/localai folder diff --git a/Dockerfile b/Dockerfile index 28147e75b856..4f1c125548f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} ARG INTEL_BASE_IMAGE=${BASE_IMAGE} +ARG UBUNTU_CODENAME=noble FROM ${BASE_IMAGE} AS requirements @@ -9,7 +10,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl wget espeak-ng libgomp1 \ - ffmpeg && \ + ffmpeg libopenblas0 libopenblas-dev && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -23,7 +24,7 @@ ARG SKIP_DRIVERS=false ARG TARGETARCH ARG TARGETVARIANT ENV BUILD_TYPE=${BUILD_TYPE} -ARG UBUNTU_VERSION=2204 +ARG UBUNTU_VERSION=2404 RUN mkdir -p /run/localai RUN echo "default" > /run/localai/capability @@ -34,11 +35,30 @@ RUN < /run/localai/capability @@ -71,7 +91,7 @@ RUN < /etc/apt/sources.list.d/intel-graphics.list +RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu ${UBUNTU_CODENAME}/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list RUN apt-get update && \ apt-get install -y --no-install-recommends \ intel-oneapi-runtime-libs && \ diff --git a/Dockerfile.aio b/Dockerfile.aio index 81063bb4dbeb..ccc2fc94b9ed 100644 --- a/Dockerfile.aio +++ b/Dockerfile.aio @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 FROM ${BASE_IMAGE} diff --git a/Makefile b/Makefile index 6df349eb66d7..9eb59fefb8ba 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +# Disable parallel execution for backend builds +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin backends/vllm + GOCMD=go GOTEST=$(GOCMD) test GOVET=$(GOCMD) vet @@ -7,10 +10,13 @@ LAUNCHER_BINARY_NAME=local-ai-launcher CUDA_MAJOR_VERSION?=13 CUDA_MINOR_VERSION?=0 UBUNTU_VERSION?=2204 +UBUNTU_CODENAME?=noble GORELEASER?= export BUILD_TYPE?= +export CUDA_MAJOR_VERSION?=12 +export CUDA_MINOR_VERSION?=9 GO_TAGS?= BUILD_ID?= @@ -164,6 +170,7 @@ docker-build-aio: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ --build-arg GO_TAGS="$(GO_TAGS)" \ -t local-ai:tests -f Dockerfile . BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test $(MAKE) docker-aio @@ -194,6 +201,7 @@ prepare-e2e: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ -t localai-tests . @@ -318,7 +326,7 @@ test-extra: prepare-test-extra DOCKER_IMAGE?=local-ai DOCKER_AIO_IMAGE?=local-ai-aio IMAGE_TYPE?=core -BASE_IMAGE?=ubuntu:22.04 +BASE_IMAGE?=ubuntu:24.04 docker: docker build \ @@ -330,19 +338,21 @@ docker: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ -t $(DOCKER_IMAGE) . -docker-cuda11: +docker-cuda12: docker build \ - --build-arg CUDA_MAJOR_VERSION=11 \ - --build-arg CUDA_MINOR_VERSION=8 \ + --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \ + --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ - -t $(DOCKER_IMAGE)-cuda-11 . + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ + -t $(DOCKER_IMAGE)-cuda-12 . docker-aio: @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" @@ -352,6 +362,7 @@ docker-aio: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio . docker-aio-all: @@ -360,7 +371,7 @@ docker-aio-all: docker-image-intel: docker build \ - --build-arg BASE_IMAGE=quay.io/go-skynet/intel-oneapi-base:latest \ + --build-arg BASE_IMAGE=intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ @@ -368,6 +379,7 @@ docker-image-intel: --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ -t $(DOCKER_IMAGE) . ######################################################## @@ -453,6 +465,7 @@ define docker-build-backend --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) \ --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) \ --build-arg UBUNTU_VERSION=$(UBUNTU_VERSION) \ + --build-arg UBUNTU_CODENAME=$(UBUNTU_CODENAME) \ $(if $(filter true,$(5)),--build-arg BACKEND=$(1)) \ -t local-ai-backend:$(1) -f backend/Dockerfile.$(2) $(3) endef diff --git a/README.md b/README.md index b3993940ed1e..092432241837 100644 --- a/README.md +++ b/README.md @@ -152,9 +152,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gp # CUDA 12.0 docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 -# CUDA 11.7 -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11 - # NVIDIA Jetson (L4T) ARM64 # CUDA 12 (for Nvidia AGX Orin and similar platforms) docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-nvidia-l4t-arm64 @@ -193,9 +190,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai # NVIDIA CUDA 12 version docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12 -# NVIDIA CUDA 11 version -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11 - # Intel GPU version docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-gpu-intel @@ -279,9 +273,9 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration ### Text Generation & Language Models | Backend | Description | Acceleration Support | |---------|-------------|---------------------| -| **llama.cpp** | LLM inference in C/C++ | CUDA 11/12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU | +| **llama.cpp** | LLM inference in C/C++ | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU | | **vLLM** | Fast LLM inference with PagedAttention | CUDA 12/13, ROCm, Intel | -| **transformers** | HuggingFace transformers framework | CUDA 11/12/13, ROCm, Intel, CPU | +| **transformers** | HuggingFace transformers framework | CUDA 12/13, ROCm, Intel, CPU | | **exllama2** | GPTQ inference library | CUDA 12/13 | | **MLX** | Apple Silicon LLM inference | Metal (M1/M2/M3+) | | **MLX-VLM** | Apple Silicon Vision-Language Models | Metal (M1/M2/M3+) | @@ -295,7 +289,7 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration | **bark-cpp** | C++ implementation of Bark | CUDA, Metal, CPU | | **coqui** | Advanced TTS with 1100+ languages | CUDA 12/13, ROCm, Intel, CPU | | **kokoro** | Lightweight TTS model | CUDA 12/13, ROCm, Intel, CPU | -| **chatterbox** | Production-grade TTS | CUDA 11/12/13, CPU | +| **chatterbox** | Production-grade TTS | CUDA 12/13, CPU | | **piper** | Fast neural TTS system | CPU | | **kitten-tts** | Kitten TTS models | CPU | | **silero-vad** | Voice Activity Detection | CPU | @@ -306,13 +300,13 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration | Backend | Description | Acceleration Support | |---------|-------------|---------------------| | **stablediffusion.cpp** | Stable Diffusion in C/C++ | CUDA 12/13, Intel SYCL, Vulkan, CPU | -| **diffusers** | HuggingFace diffusion models | CUDA 11/12/13, ROCm, Intel, Metal, CPU | +| **diffusers** | HuggingFace diffusion models | CUDA 12/13, ROCm, Intel, Metal, CPU | ### Specialized AI Tasks | Backend | Description | Acceleration Support | |---------|-------------|---------------------| | **rfdetr** | Real-time object detection | CUDA 12/13, Intel, CPU | -| **rerankers** | Document reranking API | CUDA 11/12/13, ROCm, Intel, CPU | +| **rerankers** | Document reranking API | CUDA 12/13, ROCm, Intel, CPU | | **local-store** | Vector database | CPU | | **huggingface** | HuggingFace API integration | API-based | @@ -320,7 +314,6 @@ LocalAI supports a comprehensive range of AI backends with multiple acceleration | Acceleration Type | Supported Backends | Hardware Support | |-------------------|-------------------|------------------| -| **NVIDIA CUDA 11** | llama.cpp, whisper, stablediffusion, diffusers, rerankers, bark, chatterbox | Nvidia hardware | | **NVIDIA CUDA 12** | All CUDA-compatible backends | Nvidia hardware | | **NVIDIA CUDA 13** | All CUDA-compatible backends | Nvidia hardware | | **AMD ROCm** | llama.cpp, whisper, vllm, transformers, diffusers, rerankers, coqui, kokoro, bark, neutts, vibevoice | AMD Graphics | diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 1db39c9e1d63..7843d2ba6b8d 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 FROM ${BASE_IMAGE} AS builder ARG BACKEND=rerankers @@ -12,8 +12,8 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} ENV DEBIAN_FRONTEND=noninteractive ARG TARGETARCH ARG TARGETVARIANT -ARG GO_VERSION=1.22.6 -ARG UBUNTU_VERSION=2204 +ARG GO_VERSION=1.25.4 +ARG UBUNTU_VERSION=2404 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -40,11 +40,30 @@ RUN <=1.24.0,<1.26.0 -# https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 -chatterbox-tts@git+https://git@github.com/mudler/chatterbox.git@faster -accelerate \ No newline at end of file diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt index 6c21992a7585..ed30fb824107 100644 --- a/backend/python/chatterbox/requirements-hipblas.txt +++ b/backend/python/chatterbox/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.6.0+rocm6.1 -torchaudio==2.6.0+rocm6.1 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.9.1+rocm6.4 +torchaudio==2.9.1+rocm6.4 transformers numpy>=1.24.0,<1.26.0 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh index 9af6ca6736f5..eb55f43d9547 100644 --- a/backend/python/common/libbackend.sh +++ b/backend/python/common/libbackend.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -euo pipefail -# +# # use the library by adding the following line to a script: # source $(dirname $0)/../common/libbackend.sh # @@ -206,8 +206,8 @@ function init() { # getBuildProfile will inspect the system to determine which build profile is appropriate: # returns one of the following: -# - cublas11 # - cublas12 +# - cublas13 # - hipblas # - intel function getBuildProfile() { @@ -392,7 +392,7 @@ function runProtogen() { # - requirements-${BUILD_TYPE}.txt # - requirements-${BUILD_PROFILE}.txt # -# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-11 or cuda-12 +# BUILD_PROFILE is a more specific version of BUILD_TYPE, ex: cuda-12 or cuda-13 # it can also include some options that we do not have BUILD_TYPES for, ex: intel # # NOTE: for BUILD_PROFILE==intel, this function does NOT automatically use the Intel python package index. diff --git a/backend/python/common/template/requirements-hipblas.txt b/backend/python/common/template/requirements-hipblas.txt index 76018445f448..b733ec7b148b 100644 --- a/backend/python/common/template/requirements-hipblas.txt +++ b/backend/python/common/template/requirements-hipblas.txt @@ -1,2 +1,2 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 torch \ No newline at end of file diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt deleted file mode 100644 index 97e1ef0a4afe..000000000000 --- a/backend/python/coqui/requirements-cublas11.txt +++ /dev/null @@ -1,6 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -torchaudio==2.4.1+cu118 -transformers==4.48.3 -accelerate -coqui-tts \ No newline at end of file diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt index 55cdcdddb845..8e7d034591e3 100644 --- a/backend/python/coqui/requirements-hipblas.txt +++ b/backend/python/coqui/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.4.1+rocm6.0 -torchaudio==2.4.1+rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchaudio==2.8.0+rocm6.4 transformers==4.48.3 accelerate coqui-tts \ No newline at end of file diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt deleted file mode 100644 index 7b77f7f68693..000000000000 --- a/backend/python/diffusers/requirements-cublas11.txt +++ /dev/null @@ -1,12 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -git+https://github.com/huggingface/diffusers -opencv-python -transformers -torchvision==0.22.1 -accelerate -compel -peft -sentencepiece -torch==2.7.1 -optimum-quanto -ftfy \ No newline at end of file diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt index aeea375639a0..b1f8b3e048c5 100644 --- a/backend/python/diffusers/requirements-hipblas.txt +++ b/backend/python/diffusers/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchvision==0.22.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchvision==0.23.0+rocm6.4 git+https://github.com/huggingface/diffusers opencv-python transformers diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt deleted file mode 100644 index 2d1958c75153..000000000000 --- a/backend/python/exllama2/requirements-cublas11.txt +++ /dev/null @@ -1,4 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -transformers -accelerate \ No newline at end of file diff --git a/backend/python/faster-whisper/requirements-cublas11.txt b/backend/python/faster-whisper/requirements-cublas11.txt deleted file mode 100644 index b74532957a4a..000000000000 --- a/backend/python/faster-whisper/requirements-cublas11.txt +++ /dev/null @@ -1,9 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -faster-whisper -opencv-python -accelerate -compel -peft -sentencepiece -optimum-quanto \ No newline at end of file diff --git a/backend/python/faster-whisper/requirements-hipblas.txt b/backend/python/faster-whisper/requirements-hipblas.txt index 29413f0508b3..da9c9123c0d7 100644 --- a/backend/python/faster-whisper/requirements-hipblas.txt +++ b/backend/python/faster-whisper/requirements-hipblas.txt @@ -1,3 +1,3 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 torch faster-whisper \ No newline at end of file diff --git a/backend/python/kokoro/requirements-cublas11.txt b/backend/python/kokoro/requirements-cublas11.txt deleted file mode 100644 index 628933b5640a..000000000000 --- a/backend/python/kokoro/requirements-cublas11.txt +++ /dev/null @@ -1,7 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.7.1+cu118 -torchaudio==2.7.1+cu118 -transformers -accelerate -kokoro -soundfile \ No newline at end of file diff --git a/backend/python/kokoro/requirements-hipblas.txt b/backend/python/kokoro/requirements-hipblas.txt index 1226d917447b..74262df5c3ce 100644 --- a/backend/python/kokoro/requirements-hipblas.txt +++ b/backend/python/kokoro/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchaudio==2.7.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchaudio==2.8.0+rocm6.4 transformers accelerate kokoro diff --git a/backend/python/neutts/requirements-hipblas.txt b/backend/python/neutts/requirements-hipblas.txt index 012d3c8bf6f5..72d11e059817 100644 --- a/backend/python/neutts/requirements-hipblas.txt +++ b/backend/python/neutts/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.8.0+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 transformers==4.56.1 accelerate librosa==0.11.0 diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt deleted file mode 100644 index fef296fe8bb3..000000000000 --- a/backend/python/rerankers/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -transformers -accelerate -torch==2.4.1+cu118 -rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt index b1c8baeddfe9..7a72b3d0650f 100644 --- a/backend/python/rerankers/requirements-hipblas.txt +++ b/backend/python/rerankers/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 transformers accelerate -torch==2.4.1+rocm6.0 +torch==2.8.0+rocm6.4 rerankers[transformers] \ No newline at end of file diff --git a/backend/python/rfdetr/requirements-cublas11.txt b/backend/python/rfdetr/requirements-cublas11.txt deleted file mode 100644 index 14449b3d4b00..000000000000 --- a/backend/python/rfdetr/requirements-cublas11.txt +++ /dev/null @@ -1,8 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.7.1+cu118 -rfdetr -opencv-python -accelerate -inference -peft -optimum-quanto \ No newline at end of file diff --git a/backend/python/rfdetr/requirements-hipblas.txt b/backend/python/rfdetr/requirements-hipblas.txt index 536a31efb509..884cfba7be46 100644 --- a/backend/python/rfdetr/requirements-hipblas.txt +++ b/backend/python/rfdetr/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 -torchvision==0.22.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 +torchvision==0.23.0+rocm6.4 rfdetr opencv-python accelerate diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt deleted file mode 100644 index 8402f001e3cd..000000000000 --- a/backend/python/transformers/requirements-cublas11.txt +++ /dev/null @@ -1,10 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.7.1+cu118 -llvmlite==0.43.0 -numba==0.60.0 -accelerate -transformers -bitsandbytes -outetts -sentence-transformers==5.2.0 -protobuf==6.33.2 \ No newline at end of file diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index 732a3adfcdc4..59f99e3643fa 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -1,5 +1,5 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.3 -torch==2.7.1+rocm6.3 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.8.0+rocm6.4 accelerate transformers llvmlite==0.43.0 diff --git a/backend/python/vibevoice/requirements-cublas11.txt b/backend/python/vibevoice/requirements-cublas11.txt deleted file mode 100644 index 547b198aa870..000000000000 --- a/backend/python/vibevoice/requirements-cublas11.txt +++ /dev/null @@ -1,22 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -git+https://github.com/huggingface/diffusers -opencv-python -transformers==4.51.3 -torchvision==0.22.1 -accelerate -compel -peft -sentencepiece -torch==2.7.1 -optimum-quanto -ftfy -llvmlite>=0.40.0 -numba>=0.57.0 -tqdm -numpy -scipy -librosa -ml-collections -absl-py -gradio -av \ No newline at end of file diff --git a/backend/python/vllm/install.sh b/backend/python/vllm/install.sh index 364ff7a41b99..7dcd29db4a92 100755 --- a/backend/python/vllm/install.sh +++ b/backend/python/vllm/install.sh @@ -28,7 +28,7 @@ fi # We don't embed this into the images as it is a large dependency and not always needed. # Besides, the speed inference are not actually usable in the current state for production use-cases. -if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE}" == "xtrue" ]; then +if [ "x${BUILD_TYPE}" == "x" ] && [ "x${FROM_SOURCE:-}" == "xtrue" ]; then ensureVenv # https://docs.vllm.ai/en/v0.6.1/getting_started/cpu-installation.html if [ ! -d vllm ]; then diff --git a/backend/python/vllm/requirements-cublas11-after.txt b/backend/python/vllm/requirements-cublas11-after.txt deleted file mode 100644 index 7bfe8efeb555..000000000000 --- a/backend/python/vllm/requirements-cublas11-after.txt +++ /dev/null @@ -1 +0,0 @@ -flash-attn \ No newline at end of file diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt deleted file mode 100644 index 95fdd7aea702..000000000000 --- a/backend/python/vllm/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -accelerate -torch==2.7.0+cu118 -transformers -bitsandbytes \ No newline at end of file diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt index 3a65e0d7c907..db732bc864ef 100644 --- a/backend/python/vllm/requirements-hipblas.txt +++ b/backend/python/vllm/requirements-hipblas.txt @@ -1,4 +1,4 @@ ---extra-index-url https://download.pytorch.org/whl/nightly/rocm6.3 +--extra-index-url https://download.pytorch.org/whl/nightly/rocm6.4 accelerate torch transformers diff --git a/docker-compose.yaml b/docker-compose.yaml index b9880352ad8f..765a3fb63b2e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -11,7 +11,7 @@ services: dockerfile: Dockerfile args: - IMAGE_TYPE=core - - BASE_IMAGE=ubuntu:22.04 + - BASE_IMAGE=ubuntu:24.04 ports: - 8080:8080 env_file: diff --git a/docs/content/getting-started/container-images.md b/docs/content/getting-started/container-images.md index 5f4db3929853..7ea98965de4f 100644 --- a/docs/content/getting-started/container-images.md +++ b/docs/content/getting-started/container-images.md @@ -50,16 +50,6 @@ Standard container images do not have pre-installed models. Use these if you wan {{% /tab %}} -{{% tab title="GPU Images CUDA 11" %}} - -| Description | Quay | Docker Hub | -| --- | --- |-------------------------------------------------------------| -| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda-11` | `localai/localai:master-gpu-nvidia-cuda-11` | -| Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11` | -| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda-11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda-11` | - -{{% /tab %}} - {{% tab title="GPU Images CUDA 12" %}} | Description | Quay | Docker Hub | @@ -169,11 +159,9 @@ services: image: localai/localai:latest-aio-cpu # For a specific version: # image: localai/localai:{{< version >}}-aio-cpu - # For Nvidia GPUs decomment one of the following (cuda11, cuda12, or cuda13): - # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-11 + # For Nvidia GPUs decomment one of the following (cuda12 or cuda13): # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-12 # image: localai/localai:{{< version >}}-aio-gpu-nvidia-cuda-13 - # image: localai/localai:latest-aio-gpu-nvidia-cuda-11 # image: localai/localai:latest-aio-gpu-nvidia-cuda-12 # image: localai/localai:latest-aio-gpu-nvidia-cuda-13 healthcheck: @@ -225,7 +213,6 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/lo | --- | --- |-----------------------------------------------| | Latest images for CPU | `quay.io/go-skynet/local-ai:latest-aio-cpu` | `localai/localai:latest-aio-cpu` | | Versioned image (e.g. for CPU) | `quay.io/go-skynet/local-ai:{{< version >}}-aio-cpu` | `localai/localai:{{< version >}}-aio-cpu` | -| Latest images for Nvidia GPU (CUDA11) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-11` | `localai/localai:latest-aio-gpu-nvidia-cuda-11` | | Latest images for Nvidia GPU (CUDA12) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12` | `localai/localai:latest-aio-gpu-nvidia-cuda-12` | | Latest images for Nvidia GPU (CUDA13) | `quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-13` | `localai/localai:latest-aio-gpu-nvidia-cuda-13` | | Latest images for AMD GPU | `quay.io/go-skynet/local-ai:latest-aio-gpu-hipblas` | `localai/localai:latest-aio-gpu-hipblas` | diff --git a/docs/content/installation/docker.md b/docs/content/installation/docker.md index 1a3ea706c551..7cb354f98a8a 100644 --- a/docs/content/installation/docker.md +++ b/docs/content/installation/docker.md @@ -68,11 +68,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gp docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-12 ``` -**NVIDIA CUDA 11:** -```bash -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-gpu-nvidia-cuda-11 -``` - **AMD GPU (ROCm):** ```bash docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-gpu-hipblas @@ -122,11 +117,6 @@ docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-ai docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-12 ``` -**NVIDIA CUDA 11:** -```bash -docker run -ti --name local-ai -p 8080:8080 --gpus all localai/localai:latest-aio-gpu-nvidia-cuda-11 -``` - **AMD GPU (ROCm):** ```bash docker run -ti --name local-ai -p 8080:8080 --device=/dev/kfd --device=/dev/dri --group-add=video localai/localai:latest-aio-gpu-hipblas diff --git a/docs/content/reference/compatibility-table.md b/docs/content/reference/compatibility-table.md index b34b3d452711..97bc61313dc0 100644 --- a/docs/content/reference/compatibility-table.md +++ b/docs/content/reference/compatibility-table.md @@ -18,9 +18,9 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| -| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}}) | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes | yes | CUDA 11/12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU | +| [llama.cpp]({{%relref "features/text-generation#llama.cpp" %}}) | LLama, Mamba, RWKV, Falcon, Starcoder, GPT-2, [and many others](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) | yes | GPT and Functions | yes | yes | CUDA 12/13, ROCm, Intel SYCL, Vulkan, Metal, CPU | | [vLLM](https://github.com/vllm-project/vllm) | Various GPTs and quantization formats | yes | GPT | no | no | CUDA 12/13, ROCm, Intel | -| [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats | yes | GPT, embeddings, Audio generation | yes | yes* | CUDA 11/12/13, ROCm, Intel, CPU | +| [transformers](https://github.com/huggingface/transformers) | Various GPTs and quantization formats | yes | GPT, embeddings, Audio generation | yes | yes* | CUDA 12/13, ROCm, Intel, CPU | | [exllama2](https://github.com/turboderp-org/exllamav2) | GPTQ | yes | GPT only | no | no | CUDA 12/13 | | [MLX](https://github.com/ml-explore/mlx-lm) | Various LLMs | yes | GPT | no | no | Metal (Apple Silicon) | | [MLX-VLM](https://github.com/Blaizzy/mlx-vlm) | Vision-Language Models | yes | Multimodal GPT | no | no | Metal (Apple Silicon) | @@ -37,7 +37,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | [bark-cpp](https://github.com/PABannier/bark.cpp) | bark | no | Audio-Only | no | no | CUDA, Metal, CPU | | [coqui](https://github.com/idiap/coqui-ai-TTS) | Coqui TTS | no | Audio generation and Voice cloning | no | no | CUDA 12/13, ROCm, Intel, CPU | | [kokoro](https://github.com/hexgrad/kokoro) | Kokoro TTS | no | Text-to-speech | no | no | CUDA 12/13, ROCm, Intel, CPU | -| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS | no | Text-to-speech | no | no | CUDA 11/12/13, CPU | +| [chatterbox](https://github.com/resemble-ai/chatterbox) | Chatterbox TTS | no | Text-to-speech | no | no | CUDA 12/13, CPU | | [kitten-tts](https://github.com/KittenML/KittenTTS) | Kitten TTS | no | Text-to-speech | no | no | CPU | | [silero-vad](https://github.com/snakers4/silero-vad) with [Golang bindings](https://github.com/streamer45/silero-vad-go) | Silero VAD | no | Voice Activity Detection | no | no | CPU | | [neutts](https://github.com/neuphonic/neuttsair) | NeuTTSAir | no | Text-to-speech with voice cloning | no | no | CUDA 12/13, ROCm, CPU | @@ -49,7 +49,7 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| | [stablediffusion.cpp](https://github.com/leejet/stable-diffusion.cpp) | stablediffusion-1, stablediffusion-2, stablediffusion-3, flux, PhotoMaker | no | Image | no | no | CUDA 12/13, Intel SYCL, Vulkan, CPU | -| [diffusers](https://github.com/huggingface/diffusers) | SD, various diffusion models,... | no | Image/Video generation | no | no | CUDA 11/12/13, ROCm, Intel, Metal, CPU | +| [diffusers](https://github.com/huggingface/diffusers) | SD, various diffusion models,... | no | Image/Video generation | no | no | CUDA 12/13, ROCm, Intel, Metal, CPU | | [transformers-musicgen](https://github.com/huggingface/transformers) | MusicGen | no | Audio generation | no | no | CUDA, CPU | ## Specialized AI Tasks @@ -57,14 +57,14 @@ LocalAI will attempt to automatically load models which are not explicitly confi | Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration | |----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------| | [rfdetr](https://github.com/roboflow/rf-detr) | RF-DETR | no | Object Detection | no | no | CUDA 12/13, Intel, CPU | -| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API | no | Reranking | no | no | CUDA 11/12/13, ROCm, Intel, CPU | +| [rerankers](https://github.com/AnswerDotAI/rerankers) | Reranking API | no | Reranking | no | no | CUDA 12/13, ROCm, Intel, CPU | | [local-store](https://github.com/mudler/LocalAI) | Vector database | no | Vector storage | yes | no | CPU | | [huggingface](https://huggingface.co/docs/hub/en/api) | HuggingFace API models | yes | Various AI tasks | yes | yes | API-based | ## Acceleration Support Summary ### GPU Acceleration -- **NVIDIA CUDA**: CUDA 11.7, CUDA 12.0, CUDA 13.0 support across most backends +- **NVIDIA CUDA**: CUDA 12.0, CUDA 13.0 support across most backends - **AMD ROCm**: HIP-based acceleration for AMD GPUs - **Intel oneAPI**: SYCL-based acceleration for Intel GPUs (F16/F32 precision) - **Vulkan**: Cross-platform GPU acceleration