Build llama.cpp with CUDA #82
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build llama.cpp with CUDA | |
| on: | |
| schedule: | |
| # Run daily at 00:00 UTC | |
| - cron: '0 0 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| force_build: | |
| description: 'Force build even if no new release' | |
| required: false | |
| type: boolean | |
| default: false | |
| jobs: | |
| check-release: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| should_build: ${{ steps.check.outputs.should_build }} | |
| release_tag: ${{ steps.check.outputs.release_tag }} | |
| release_hash: ${{ steps.check.outputs.release_hash }} | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Check for new llama.cpp release | |
| id: check | |
| run: | | |
| # Get latest release from llama.cpp | |
| LATEST_RELEASE=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | jq -r '.tag_name') | |
| RELEASE_HASH=$(curl -s https://api.github.com/repos/ggml-org/llama.cpp/releases/latest | jq -r '.target_commitish') | |
| echo "Latest llama.cpp release: $LATEST_RELEASE" | |
| echo "Release hash: $RELEASE_HASH" | |
| # Check if we've already built this release | |
| if git tag | grep -q "^${LATEST_RELEASE}$"; then | |
| echo "Release $LATEST_RELEASE already built" | |
| if [ "${{ github.event.inputs.force_build }}" = "true" ]; then | |
| echo "Force build enabled, building anyway" | |
| echo "should_build=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "should_build=false" >> $GITHUB_OUTPUT | |
| fi | |
| else | |
| echo "New release detected: $LATEST_RELEASE" | |
| echo "should_build=true" >> $GITHUB_OUTPUT | |
| fi | |
| echo "release_tag=$LATEST_RELEASE" >> $GITHUB_OUTPUT | |
| echo "release_hash=$RELEASE_HASH" >> $GITHUB_OUTPUT | |
| build: | |
| needs: check-release | |
| if: needs.check-release.outputs.should_build == 'true' | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| cuda_version: ['12.4.1', '12.6.3', '12.8.1', '12.9.1', '13.0.1'] | |
| include: | |
| - cuda_version: '12.4.1' | |
| cuda_version_short: '12.4' | |
| cuda_tag: '12.4.1-cudnn-devel-ubuntu22.04' | |
| architectures: '61-virtual;70-virtual;75-virtual;80-virtual;86-virtual;89-virtual;90-virtual' | |
| - cuda_version: '12.6.3' | |
| cuda_version_short: '12.6' | |
| cuda_tag: '12.6.3-cudnn-devel-ubuntu22.04' | |
| architectures: '61-virtual;70-virtual;75-virtual;80-virtual;86-virtual;89-virtual;90-virtual' | |
| - cuda_version: '12.8.1' | |
| cuda_version_short: '12.8' | |
| cuda_tag: '12.8.1-cudnn-devel-ubuntu22.04' | |
| architectures: '61-virtual;70-virtual;75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;100-virtual;120-virtual' | |
| - cuda_version: '12.9.1' | |
| cuda_version_short: '12.9' | |
| cuda_tag: '12.9.1-cudnn-devel-ubuntu22.04' | |
| architectures: '61-virtual;70-virtual;75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;100-virtual;120-virtual' | |
| - cuda_version: '13.0.1' | |
| cuda_version_short: '13.0' | |
| cuda_tag: '13.0.1-cudnn-devel-ubuntu22.04' | |
| architectures: '75-virtual;80-virtual;86-virtual;89-virtual;90-virtual;100-virtual;120-virtual' | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Maximize build space | |
| run: | | |
| echo "=== Initial Disk Space ===" | |
| df -h | |
| echo "=== Removing unnecessary packages ===" | |
| sudo apt-get remove -y '^aspnetcore-.*' || true | |
| sudo apt-get remove -y '^dotnet-.*' --fix-missing || true | |
| sudo apt-get remove -y '^llvm-.*' --fix-missing || true | |
| sudo apt-get remove -y 'php.*' --fix-missing || true | |
| sudo apt-get remove -y '^mongodb-.*' --fix-missing || true | |
| sudo apt-get remove -y '^mysql-.*' --fix-missing || true | |
| sudo apt-get remove -y azure-cli google-chrome-stable firefox powershell mono-devel libgl1-mesa-dri --fix-missing || true | |
| sudo apt-get autoremove -y || true | |
| sudo apt-get clean || true | |
| echo "=== Removing large directories ===" | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /usr/local/lib/android | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| sudo rm -rf /usr/local/share/boost | |
| sudo rm -rf /usr/share/swift | |
| sudo rm -rf /usr/local/.ghcup | |
| sudo rm -rf "$AGENT_TOOLSDIRECTORY" | |
| # Clean docker system | |
| docker system prune -af || true | |
| echo "=== After Cleanup ===" | |
| df -h | |
| - name: Build llama.cpp with CUDA | |
| run: | | |
| # Pull Docker image | |
| docker pull nvidia/cuda:${{ matrix.cuda_tag }} | |
| # Run build in container | |
| docker run --rm \ | |
| -v $PWD:/workspace \ | |
| nvidia/cuda:${{ matrix.cuda_tag }} \ | |
| bash -c " | |
| set -e | |
| echo '=== Installing minimal dependencies ===' | |
| apt-get update -qq | |
| apt-get install -y --no-install-recommends git cmake ninja-build build-essential libcurl4-openssl-dev ca-certificates | |
| apt-get clean | |
| rm -rf /var/lib/apt/lists/* | |
| echo '=== Cloning llama.cpp ===' | |
| cd /workspace | |
| git clone --depth 1 --branch ${{ needs.check-release.outputs.release_tag }} https://github.com/ggml-org/llama.cpp.git || \ | |
| (git clone https://github.com/ggml-org/llama.cpp.git && cd llama.cpp && git checkout ${{ needs.check-release.outputs.release_hash }}) | |
| cd llama.cpp | |
| echo '=== Configuring build with Ninja ===' | |
| export LIBRARY_PATH="/usr/local/cuda/lib64/stubs${LIBRARY_PATH:+:$LIBRARY_PATH}" | |
| ln -sf /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 | |
| cmake -B build -S . \ | |
| -G Ninja \ | |
| -DGGML_CUDA=ON \ | |
| -DCMAKE_CUDA_ARCHITECTURES='${{ matrix.architectures }}' \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DBUILD_SHARED_LIBS=ON \ | |
| -DGGML_NATIVE=OFF \ | |
| -DLLAMA_BUILD_TESTS=OFF \ | |
| -DLLAMA_BUILD_EXAMPLES=OFF \ | |
| -DCMAKE_EXE_LINKER_FLAGS='-Wl,-rpath-link,/usr/local/cuda/lib64/stubs' \ | |
| -DCMAKE_SHARED_LINKER_FLAGS='-Wl,-rpath-link,/usr/local/cuda/lib64/stubs' | |
| echo '=== Building with Ninja (parallel: all cores) ===' | |
| cmake --build build --config Release -j\$(nproc) | |
| echo '=== Copying binaries ===' | |
| cd /workspace | |
| mkdir -p binaries/cuda-${{ matrix.cuda_version_short }} | |
| # Copy everything from build/bin | |
| cp -r llama.cpp/build/bin/* binaries/cuda-${{ matrix.cuda_version_short }}/ | |
| # Strip binaries to reduce size (executables only, not .so files) | |
| find binaries/cuda-${{ matrix.cuda_version_short }}/ -type f -executable ! -name "*.so*" -exec strip {} \; 2>/dev/null || true | |
| echo '=== Creating version info ===' | |
| echo 'llama.cpp version: ${{ needs.check-release.outputs.release_tag }}' > binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt | |
| echo 'CUDA version: ${{ matrix.cuda_version }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt | |
| echo 'Architectures: ${{ matrix.architectures }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt | |
| echo 'Build date: '$(date -u +%Y-%m-%d) >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt | |
| echo 'Build hash: ${{ needs.check-release.outputs.release_hash }}' >> binaries/cuda-${{ matrix.cuda_version_short }}/VERSION.txt | |
| echo '=== Build complete ===' | |
| ls -lh binaries/cuda-${{ matrix.cuda_version_short }}/ | |
| echo '=== Cleaning up build directory ===' | |
| rm -rf llama.cpp | |
| " | |
| # Remove Docker image to free space | |
| docker rmi nvidia/cuda:${{ matrix.cuda_tag }} || true | |
| # Fix permissions for files created by root in container | |
| sudo chown -R $(id -u):$(id -g) $PWD | |
| echo "=== Final disk usage ===" | |
| df -h | |
| - name: Create tarball | |
| run: | | |
| cd binaries | |
| tar -czf llama.cpp-${{ needs.check-release.outputs.release_tag }}-cuda-${{ matrix.cuda_version_short }}.tar.gz cuda-${{ matrix.cuda_version_short }} | |
| ls -lh *.tar.gz | |
| - name: Upload artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: llama.cpp-cuda-${{ matrix.cuda_version_short }} | |
| path: binaries/*.tar.gz | |
| retention-days: 1 | |
| release: | |
| needs: [check-release, build] | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Download all artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: artifacts | |
| - name: Prepare release assets | |
| run: | | |
| mkdir -p release-assets | |
| find artifacts -name "*.tar.gz" -exec cp {} release-assets/ \; | |
| ls -lh release-assets/ | |
| - name: Create Release | |
| uses: softprops/action-gh-release@v1 | |
| with: | |
| tag_name: ${{ needs.check-release.outputs.release_tag }} | |
| name: llama.cpp ${{ needs.check-release.outputs.release_tag }} with CUDA | |
| body: | | |
| # llama.cpp ${{ needs.check-release.outputs.release_tag }} with CUDA Support | |
| Pre-built binaries of llama.cpp with CUDA support for multiple CUDA versions. | |
| **Source:** https://github.com/ggml-org/llama.cpp/releases/tag/${{ needs.check-release.outputs.release_tag }} | |
| **Commit:** ${{ needs.check-release.outputs.release_hash }} | |
| ## CUDA Versions | |
| - CUDA 12.4 - Architectures: 6.1, 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 | |
| - CUDA 12.6 - Architectures: 6.1, 7.0, 7.5, 8.0, 8.6, 8.9, 9.0 | |
| - CUDA 12.8 - Architectures: 6.1, 7.0, 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0 | |
| - CUDA 12.9 - Architectures: 6.1, 7.0, 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0 | |
| - CUDA 13.0 - Architectures: 7.5, 8.0, 8.6, 8.9, 9.0, 10.0, 12.0 | |
| ## Architecture Reference | |
| - 6.1: Titan XP, Tesla P40, GTX 10xx | |
| - 7.0: Tesla V100 | |
| - 7.5: Tesla T4, RTX 20xx series, Quadro RTX | |
| - 8.0: A100 | |
| - 8.6: RTX 3000 series | |
| - 8.9: RTX 4000 series, L4, L40 | |
| - 9.0: H100, H200 | |
| - 10.0: B200 | |
| - 12.0: RTX Pro series, RTX 50xx | |
| ## Usage | |
| Download the appropriate tarball for your CUDA version and extract: | |
| ```bash | |
| tar -xzf llama.cpp-${{ needs.check-release.outputs.release_tag }}-cuda-12.8.tar.gz | |
| ./llama-cli --help | |
| ``` | |
| files: release-assets/* | |
| draft: false | |
| prerelease: false |