Skip to content

Commit 5add7b4

Browse files
fix: BMI2 crash on AVX-only CPUs (Intel Ivy Bridge/Sandy Bridge) (#7864)
* Fix BMI2 crash on AVX-only CPUs (Intel Ivy Bridge/Sandy Bridge) Signed-off-by: coffeerunhobby <coffeerunhobby@users.noreply.github.com> * Address feedback from review Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: coffeerunhobby <coffeerunhobby@users.noreply.github.com> Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: coffeerunhobby <coffeerunhobby@users.noreply.github.com> Co-authored-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
1 parent 3244ccc commit 5add7b4

File tree

4 files changed

+42
-22
lines changed

4 files changed

+42
-22
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ ENV PATH=/opt/rocm/bin:${PATH}
142142
FROM requirements-drivers AS build-requirements
143143

144144
ARG GO_VERSION=1.22.6
145-
ARG CMAKE_VERSION=3.26.4
145+
ARG CMAKE_VERSION=3.31.10
146146
ARG CMAKE_FROM_SOURCE=false
147147
ARG TARGETARCH
148148
ARG TARGETVARIANT

backend/Dockerfile.llama-cpp

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
1010
ARG GRPC_MAKEFLAGS="-j4 -Otarget"
1111
ARG GRPC_VERSION=v1.65.0
1212
ARG CMAKE_FROM_SOURCE=false
13-
ARG CMAKE_VERSION=3.26.4
13+
# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues
14+
ARG CMAKE_VERSION=3.31.10
1415

1516
ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
1617

@@ -26,7 +27,7 @@ RUN apt-get update && \
2627

2728
# Install CMake (the version in 22.04 is too old)
2829
RUN <<EOT bash
29-
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
30+
if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
3031
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
3132
else
3233
apt-get update && \
@@ -50,6 +51,13 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
5051
rm -rf /build
5152

5253
FROM ${BASE_IMAGE} AS builder
54+
ARG CMAKE_FROM_SOURCE=false
55+
ARG CMAKE_VERSION=3.31.10
56+
# We can target specific CUDA ARCHITECTURES like --build-arg CUDA_DOCKER_ARCH='75;86;89;120'
57+
ARG CUDA_DOCKER_ARCH
58+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
59+
ARG CMAKE_ARGS
60+
ENV CMAKE_ARGS=${CMAKE_ARGS}
5361
ARG BACKEND=rerankers
5462
ARG BUILD_TYPE
5563
ENV BUILD_TYPE=${BUILD_TYPE}
@@ -70,6 +78,7 @@ RUN apt-get update && \
7078
ccache git \
7179
ca-certificates \
7280
make \
81+
pkg-config libcurl4-openssl-dev \
7382
curl unzip \
7483
libssl-dev wget && \
7584
apt-get clean && \
@@ -189,7 +198,7 @@ EOT
189198

190199
# Install CMake (the version in 22.04 is too old)
191200
RUN <<EOT bash
192-
if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
201+
if [ "${CMAKE_FROM_SOURCE}" = "true" ]; then
193202
curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
194203
else
195204
apt-get update && \
@@ -205,19 +214,30 @@ COPY --from=grpc /opt/grpc /usr/local
205214

206215
COPY . /LocalAI
207216

208-
## Otherwise just run the normal build
209-
RUN <<EOT bash
210-
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then \
211-
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-fallback && \
212-
make llama-cpp-grpc && make llama-cpp-rpc-server; \
213-
else \
214-
cd /LocalAI/backend/cpp/llama-cpp && make llama-cpp-avx && \
215-
make llama-cpp-avx2 && \
216-
make llama-cpp-avx512 && \
217-
make llama-cpp-fallback && \
218-
make llama-cpp-grpc && \
219-
make llama-cpp-rpc-server; \
220-
fi
217+
RUN <<'EOT' bash
218+
set -euxo pipefail
219+
220+
if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
221+
CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
222+
export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
223+
echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
224+
rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
225+
fi
226+
227+
if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
228+
cd /LocalAI/backend/cpp/llama-cpp
229+
make llama-cpp-fallback
230+
make llama-cpp-grpc
231+
make llama-cpp-rpc-server
232+
else
233+
cd /LocalAI/backend/cpp/llama-cpp
234+
make llama-cpp-avx
235+
make llama-cpp-avx2
236+
make llama-cpp-avx512
237+
make llama-cpp-fallback
238+
make llama-cpp-grpc
239+
make llama-cpp-rpc-server
240+
fi
221241
EOT
222242

223243

backend/cpp/llama-cpp/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,4 @@ target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_IN
7070
target_compile_features(${TARGET} PRIVATE cxx_std_11)
7171
if(TARGET BUILD_INFO)
7272
add_dependencies(${TARGET} BUILD_INFO)
73-
endif()
73+
endif()

backend/cpp/llama-cpp/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ BUILD_TYPE?=
77
NATIVE?=false
88
ONEAPI_VARS?=/opt/intel/oneapi/setvars.sh
99
TARGET?=--target grpc-server
10-
JOBS?=$(shell nproc)
10+
JOBS?=$(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1)
1111
ARCH?=$(shell uname -m)
1212

1313
# Disable Shared libs as we are linking on static gRPC and we can't mix shared and static
@@ -112,7 +112,7 @@ ifeq ($(OS),Darwin)
112112
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
113113
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
114114
else
115-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS=-mno-bmi2 -DCMAKE_CXX_FLAGS=-mno-bmi2" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
115+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
116116
endif
117117
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
118118

@@ -125,7 +125,7 @@ ifeq ($(OS),Darwin)
125125
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
126126
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
127127
else
128-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
128+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
129129
endif
130130
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
131131

@@ -138,7 +138,7 @@ ifeq ($(OS),Darwin)
138138
else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64))
139139
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
140140
else
141-
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
141+
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
142142
endif
143143
cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
144144

0 commit comments

Comments
 (0)