diff --git a/Dockerfile b/Dockerfile index a253237c974c..28147e75b856 100644 --- a/Dockerfile +++ b/Dockerfile @@ -142,7 +142,7 @@ ENV PATH=/opt/rocm/bin:${PATH} FROM requirements-drivers AS build-requirements ARG GO_VERSION=1.22.6 -ARG CMAKE_VERSION=3.26.4 +ARG CMAKE_VERSION=3.31.10 ARG CMAKE_FROM_SOURCE=false ARG TARGETARCH ARG TARGETVARIANT diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index 8d5e0b7fbdb0..7990720af00f 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -10,7 +10,8 @@ FROM ${GRPC_BASE_IMAGE} AS grpc ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_VERSION=v1.65.0 ARG CMAKE_FROM_SOURCE=false -ARG CMAKE_VERSION=3.26.4 +# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues +ARG CMAKE_VERSION=3.31.10 ENV MAKEFLAGS=${GRPC_MAKEFLAGS} @@ -26,7 +27,7 @@ RUN apt-get update && \ # Install CMake (the version in 22.04 is too old) RUN </dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1) ARCH?=$(shell uname -m) # Disable Shared libs as we are linking on static gRPC and we can't mix shared and static @@ -112,7 +112,7 @@ ifeq ($(OS),Darwin) else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server else - CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS=-mno-bmi2 -DCMAKE_CXX_FLAGS=-mno-bmi2" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server + CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx @@ -125,7 +125,7 @@ ifeq ($(OS),Darwin) else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server else - CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server + CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback @@ -138,7 +138,7 @@ ifeq ($(OS),Darwin) else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server else - CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server + CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc