Skip to content
Draft
70 changes: 70 additions & 0 deletions .github/workflows/test-cpu-variants.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
name: Test CPU Variants

on:
workflow_dispatch:
inputs:
operation:
description: 'Operation to test (e.g., MUL_MAT or full spec)'
required: false
default: 'MUL_MAT'
type: string
variant:
description: 'CPU variant to test (leave empty to list available variants)'
required: false
default: ''
type: string

jobs:
test-cpu-variant-sve:
runs-on: ubuntu-24.04-arm
steps:
- name: Clone
uses: actions/checkout@v4

- name: Dependencies
run: |
sudo apt-get update
sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
sudo apt-get update
sudo apt-get install build-essential gcc-14 g++-14
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
gcc --version

- name: Build with CPU reference backend
run: |
cmake -B build -S . \
-DGGML_CPU_REF_BACKEND=ON \
-DGGML_CPU_ALL_VARIANTS=ON \
-DGGML_CPU_REPACK=ON \
-DGGML_NATIVE=OFF \
-DGGML_BACKEND_DL=ON \
-DGGML_BLAS=OFF \
-DLLAMA_CURL=OFF \
-DCMAKE_BUILD_TYPE=Release

cmake --build build -j8

- name: List available CPU variants
run: |
echo "Available CPU variants:"
./build/bin/test-backend-ops cpu-variants --list

- name: Test CPU variant
if: ${{ inputs.variant != '' }}
run: |
echo "Testing variant: ${{ inputs.variant }}"
echo "Operation: ${{ inputs.operation }}"
./build/bin/test-backend-ops cpu-variants \
--variant ${{ inputs.variant }} \
-o "${{ inputs.operation }}"

- name: Instructions
if: ${{ inputs.variant == '' }}
run: |
echo "=========================================="
echo "No variant specified - only listed available variants above"
echo "To test a specific variant, re-run this workflow with:"
echo " - variant: one of the variants listed above"
echo " - operation: your operation string (default: MUL_MAT)"
echo "=========================================="
9 changes: 6 additions & 3 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,9 @@ set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")

# extra artifacts
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
option(GGML_CPU_REF_BACKEND "ggml: build reference CPU backend for testing" OFF)
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})

#
# dependencies
Expand Down Expand Up @@ -283,7 +284,9 @@ add_subdirectory(src)

if (GGML_BUILD_TESTS)
enable_testing()
add_subdirectory(tests)
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/tests")
add_subdirectory(tests)
endif ()
endif ()

if (GGML_BUILD_EXAMPLES)
Expand Down
3 changes: 3 additions & 0 deletions ggml/include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ extern "C" {
// Load all known backends from dynamic libraries
GGML_API void ggml_backend_load_all(void);
GGML_API void ggml_backend_load_all_from_path(const char * dir_path);
// Load all variants for a backend and register them
GGML_API void ggml_backend_load_all_variants(const char * name);
GGML_API void ggml_backend_load_variant(const char * name, const char * variant);

//
// Backend scheduler
Expand Down
1 change: 1 addition & 0 deletions ggml/include/ggml-cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ extern "C" {
//

// x86
GGML_BACKEND_API int ggml_cpu_has_sse2 (void);
GGML_BACKEND_API int ggml_cpu_has_sse3 (void);
GGML_BACKEND_API int ggml_cpu_has_ssse3 (void);
GGML_BACKEND_API int ggml_cpu_has_avx (void);
Expand Down
27 changes: 27 additions & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,33 @@ ggml_add_backend(WebGPU)
ggml_add_backend(zDNN)
ggml_add_backend(OpenCL)

if (GGML_CPU_REF_BACKEND)
if (NOT GGML_BACKEND_DL)
message(FATAL_ERROR "GGML_CPU_REF_BACKEND requires GGML_BACKEND_DL")
endif()
set(GGML_SYSTEM_ARCH "cpu-ref")
set(GGML_LLAMAFILE OFF)
set(GGML_CPU_HBM OFF)
set(GGML_OPENMP OFF)
set(GGML_CPU_KLEIDIAI OFF)
set(GGML_CPU_REPACK OFF)
set(GGML_ACCELERATE OFF)

ggml_add_cpu_backend_variant(ref)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm|aarch64|ARM|AARCH64")
target_compile_options(ggml-cpu-ref PRIVATE
-U__ARM_NEON
-U__ARM_FEATURE_FMA
-U__ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-U__ARM_FEATURE_DOTPROD
-U__ARM_FEATURE_MATMUL_INT8
-U__ARM_FEATURE_SVE
)
endif()
target_compile_definitions(ggml PRIVATE GGML_USE_CPU_REF)
endif()

foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump
Expand Down
72 changes: 72 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,4 +605,76 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
if (backend_path) {
ggml_backend_load(backend_path);
}
#ifdef GGML_USE_CPU_REF
ggml_backend_load_best("cpu-ref", silent, dir_path);
#endif
}

void ggml_backend_load_all_variants(const char * name) {
// enumerate all the files that match [lib]ggml-name-*.[so|dll] in the search paths
const fs::path name_path = fs::u8path(name);
const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
const fs::path file_extension = backend_filename_extension();

std::vector<fs::path> search_paths;
#ifdef GGML_BACKEND_DIR
search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
#endif
// default search paths: executable directory, current directory
search_paths.push_back(get_executable_path());
search_paths.push_back(fs::current_path());

for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
continue;
}
fs::directory_iterator dir_it(search_path, fs::directory_options::skip_permission_denied);
for (const auto & entry : dir_it) {
if (entry.is_regular_file()) {
auto filename = entry.path().filename();
auto ext = entry.path().extension();
if (filename.native().find(file_prefix.native()) == 0 && ext == file_extension) {
fs::path path = search_path / filename;
ggml_backend_reg_t backend = get_reg().load_backend(path, false);
if (backend == nullptr) {
GGML_LOG_ERROR("%s: failed to load backend variant %s\n", __func__, path_str(entry.path()).c_str());
}

}
}
}
}
}

void ggml_backend_load_variant(const char * name, const char * variant) {
const fs::path name_path = fs::u8path(name);
const fs::path variant_path = fs::u8path(variant);
const fs::path file_prefix = backend_filename_prefix().native() + name_path.native() + fs::u8path("-").native();
const fs::path target_filename = file_prefix.native() + variant_path.native() + backend_filename_extension().native();

std::vector<fs::path> search_paths;
#ifdef GGML_BACKEND_DIR
search_paths.push_back(fs::u8path(GGML_BACKEND_DIR));
#endif
// default search paths: executable directory, current directory
search_paths.push_back(get_executable_path());
search_paths.push_back(fs::current_path());

for (const auto & search_path : search_paths) {
if (!fs::exists(search_path)) {
GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
continue;
}

fs::path full_path = search_path / target_filename;
if (fs::exists(full_path) && fs::is_regular_file(full_path)) {
ggml_backend_reg_t backend = get_reg().load_backend(full_path, false);
if (backend == nullptr) {
GGML_LOG_ERROR("%s: failed to load backend variant %s\n", __func__, path_str(full_path).c_str());
} else {
return;
}
}
}
}
6 changes: 6 additions & 0 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)

if (tag_name)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU-${tag_name}")
else()
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_CPU_VARIANT_NAME="CPU")
endif()

if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
Expand Down
8 changes: 8 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -3443,6 +3443,14 @@ int ggml_cpu_has_llamafile(void) {
#endif
}

int ggml_cpu_has_sse2(void) {
#if defined(__SSE2__)
return 1;
#else
return 0;
#endif
}

int ggml_cpu_has_sse3(void) {
#if defined(__SSE3__)
return 1;
Expand Down
7 changes: 5 additions & 2 deletions ggml/src/ggml-cpu/ggml-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ struct ggml_backend_cpu_context {
};

static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
return "CPU";
return GGML_CPU_VARIANT_NAME;

GGML_UNUSED(backend);
}
Expand Down Expand Up @@ -337,7 +337,7 @@ struct ggml_backend_cpu_device_context {
};

static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
return "CPU";
return GGML_CPU_VARIANT_NAME;

GGML_UNUSED(dev);
}
Expand Down Expand Up @@ -516,6 +516,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r
ggml_cpu_init();

std::vector<ggml_backend_feature> features;
if (ggml_cpu_has_sse2()) {
features.push_back({ "SSE2", "1" });
}
if (ggml_cpu_has_sse3()) {
features.push_back({ "SSE3", "1" });
}
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-cpu/repack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1920,7 +1920,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
if ( op->op == GGML_OP_MUL_MAT &&
op->src[0]->buffer &&
(ggml_n_dims(op->src[0]) == 2) &&
ggml_n_dims(op->src[0]) == 2 && ggml_n_dims(op->src[1]) <= 2 &&
op->src[0]->buffer->buft == ggml_backend_cpu_repack_buffer_type() &&
ggml_repack_get_optimal_repack_type(op->src[0])
) {
Expand All @@ -1936,7 +1936,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
// may be possible if Q8_0 packed...
} else if (op->op == GGML_OP_MUL_MAT_ID
&& op->src[0]->buffer
&& (ggml_n_dims(op->src[0]) == 3)
&& ggml_n_dims(op->src[0]) == 3 && ggml_n_dims(op->src[1]) <= 2
&& op->src[0]->buffer->buft == ggml_backend_cpu_repack_buffer_type()
&& ggml_repack_get_optimal_repack_type(op->src[0])
) {
Expand Down
3 changes: 3 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,9 @@ if (NOT LLAMA_SANITIZE_ADDRESS)
endif()
llama_build_and_test(test-gguf.cpp)
llama_build_and_test(test-backend-ops.cpp)
target_sources(test-backend-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src/ggml.c)
target_compile_definitions(test-backend-ops PRIVATE GGML_BUILD GGML_VERSION=\"${GGML_VERSION}\" GGML_COMMIT=\"${GGML_COMMIT}\")
target_include_directories(test-backend-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)

llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
llama_build_and_test(test-autorelease.cpp LABEL "model")
Expand Down
Loading
Loading