lemonade-sdk · superm1 · Mar 26, 2026 · Feb 15, 2026
diff --git a/.github/workflows/cpp_server_build_test_release.yml b/.github/workflows/cpp_server_build_test_release.yml
@@ -1324,6 +1324,163 @@ jobs:
         with:
           artifact-name: server-logs-apikey-windows-latest
 
+  # ========================================================================
+  # TEST ROCM STABLE CHANNEL - Verify stable channel on Windows hosted runner
+  # ========================================================================
+
+  test-rocm-stable-channel:
+    name: Test ROCm Stable Channel (Windows)
+    runs-on: windows-latest
+    needs:
+      - build-lemonade-server-installer
+    env:
+      LEMONADE_CI_MODE: "True"
+      PYTHONIOENCODING: utf-8
+      GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup (Windows)
+        shell: powershell
+        run: |
+          $cwd = (Get-Item .).FullName
+          echo "HF_HOME=$cwd\hf-cache" >> $Env:GITHUB_ENV
+          echo "LEMONADE_INSTALL_PATH=$cwd\lemonade_server_install" >> $Env:GITHUB_ENV
+
+      - name: Install Lemonade Server (Windows)
+        uses: ./.github/actions/install-lemonade-server-msi
+        with:
+          install-path: ${{ env.LEMONADE_INSTALL_PATH }}
+
+      - name: Set paths (Windows)
+        shell: powershell
+        run: |
+          echo "VENV_PYTHON=.venv/Scripts/python.exe" >> $Env:GITHUB_ENV
+          echo "SERVER_BINARY=$Env:LEMONADE_INSTALL_PATH\bin\lemonade-server.exe" >> $Env:GITHUB_ENV
+
+      - name: Setup Python and virtual environment
+        uses: ./.github/actions/setup-venv
+        with:
+          venv-name: '.venv'
+          python-version: '3.10'
+          requirements-file: 'test/requirements.txt'
+
+      - name: Set ROCm channel to stable
+        shell: bash
+        run: |
+          set -e
+          echo "Setting rocm_channel to stable in config.json"
+          "$VENV_PYTHON" -c "
+          import json
+          import os
+          from pathlib import Path
+
+          cache_dir = Path.home() / '.cache' / 'lemonade'
+          config_file = cache_dir / 'config.json'
+
+          # Create cache dir if needed
+          cache_dir.mkdir(parents=True, exist_ok=True)
+
+          # Load or create config
+          if config_file.exists():
+              with open(config_file, 'r') as f:
+                  config = json.load(f)
+          else:
+              config = {}
+
+          # Set rocm_channel to stable
+          config['rocm_channel'] = 'stable'
+
+          # Save config
+          with open(config_file, 'w') as f:
+              json.dump(config, f, indent=2)
+
+          print(f'Config updated: rocm_channel = stable')
+          "
+
+      - name: Verify ROCm stable channel configuration
+        shell: bash
+        run: |
+          set -e
+          echo "Verifying rocm_channel setting..."
+          "$VENV_PYTHON" -c "
+          import json
+          from pathlib import Path
+
+          config_file = Path.home() / '.cache' / 'lemonade' / 'config.json'
+
+          if not config_file.exists():
+              print('ERROR: config.json not found')
+              exit(1)
+
+          with open(config_file, 'r') as f:
+              config = json.load(f)
+
+          channel = config.get('rocm_channel', 'NOT_SET')
+          print(f'rocm_channel = {channel}')
+
+          if channel != 'stable':
+              print(f'ERROR: Expected rocm_channel=stable, got {channel}')
+              exit(1)
+
+          print('SUCCESS: rocm_channel is set to stable')
+          "
+
+      - name: Test channel switching with lemonade CLI
+        shell: bash
+        run: |
+          set -e
+          echo "Setting rocm_channel to stable via lemonade CLI..."
+
+          # Use lemonade config set to change the channel
+          "$SERVER_BINARY" config set rocm_channel=stable
+
+          echo "Verifying config was updated..."
+          "$VENV_PYTHON" -c "
+          import json
+          from pathlib import Path
+
+          config_file = Path.home() / '.cache' / 'lemonade' / 'config.json'
+
+          if not config_file.exists():
+              print('ERROR: config.json not found')
+              exit(1)
+
+          with open(config_file, 'r') as f:
+              config = json.load(f)
+
+          channel = config.get('rocm_channel', 'NOT_SET')
+          print(f'rocm_channel = {channel}')
+
+          if channel != 'stable':
+              print(f'ERROR: Expected rocm_channel=stable, got {channel}')
+              exit(1)
+
+          print('SUCCESS: rocm_channel set to stable via CLI')
+          "
+
+      - name: Test recipes command shows rocm backend
+        shell: bash
+        run: |
+          set -e
+          echo "Testing recipes command..."
+          "$SERVER_BINARY" recipes | tee recipes_output.txt
+
+          # Check that rocm backend is listed (not rocm-stable or rocm-preview)
+          if grep -q "rocm-stable" recipes_output.txt || grep -q "rocm-preview" recipes_output.txt; then
+              echo "ERROR: Found rocm-stable or rocm-preview in recipes output (should only show 'rocm')"
+              cat recipes_output.txt
+              exit 1
+          fi
+
+          echo "SUCCESS: Recipes output looks correct"
+
+      - name: Capture and upload server logs
+        if: always()
+        uses: ./.github/actions/capture-server-logs
+        with:
+          artifact-name: server-logs-rocm-stable-channel
+
   # ========================================================================
   # RELEASE JOB - Add artifacts to GitHub release
   # ========================================================================

diff --git a/docs/llamacpp.md b/docs/llamacpp.md
@@ -0,0 +1,181 @@
+# llama.cpp Backend Options
+
+Lemonade uses [llama.cpp](https://github.com/ggerganov/llama.cpp) as its primary LLM inference backend, supporting multiple hardware acceleration options. This document explains the available backends and how to choose between them.
+
+## Available Backends
+
+### CPU
+- **Platform**: Windows, Linux, macOS
+- **Hardware**: All x86_64 processors
+- **Use Case**: Universal fallback, no GPU required
+- **Performance**: Slowest option, suitable for small models or testing
+- **Installation**: Automatically available via upstream llama.cpp releases
+
+### Vulkan
+- **Platform**: Windows, Linux
+- **Hardware**: AMD GPUs (iGPU and dGPU), NVIDIA GPUs, Intel GPUs
+- **Use Case**: Cross-vendor GPU acceleration
+- **Performance**: Good performance across all GPU vendors
+- **Installation**: Automatically available via upstream llama.cpp releases
+- **Notes**: Recommended for most GPU users
+
+### ROCm
+- **Platform**: Windows, Linux
+- **Hardware**: AMD Radeon RX 6000/7000 series (RDNA2/RDNA3/RDNA4), AMD Ryzen AI iGPUs (Strix Point/Halo)
+- **Use Case**: AMD GPU-optimized inference
+- **Performance**: Optimized for AMD hardware, may outperform Vulkan on supported GPUs
+- **Channel Options**:
+  - **Preview** (default): Custom builds with latest optimizations from lemonade-sdk
+  - **Stable**: Upstream llama.cpp releases with AMD ROCm support
+- **Installation**: Varies by channel (see below)
+
+### Metal
+- **Platform**: macOS only
+- **Hardware**: Apple Silicon (M1/M2/M3/M4) and Intel Macs with Metal support
+- **Use Case**: macOS GPU acceleration
+- **Performance**: Optimized for Apple Silicon
+- **Installation**: Automatically available via upstream llama.cpp releases
+
+### System
+- **Platform**: Linux only
+- **Hardware**: Depends on system-installed llama-server binary
+- **Use Case**: Advanced users with custom llama.cpp builds
+- **Performance**: Depends on build configuration
+- **Installation**: Requires manual installation of `llama-server` in system PATH
+- **Notes**: Not enabled by default; set `LEMONADE_LLAMACPP_PREFER_SYSTEM=true` in config
+
+## ROCm Channel Configuration
+
+The ROCm backend supports two channels to balance stability and performance:
+
+### Preview Channel (Default)
+```json
+{
+  "llamacpp": {
+    "rocm_channel": "preview"
+  }
+}
+```
+- **Source**: Custom builds from [lemonade-sdk/llamacpp-rocm](https://github.com/lemonade-sdk/llamacpp-rocm)
+- **Binaries**: Architecture-specific builds (gfx1150, gfx1151, gfx103X, gfx110X, gfx120X)
+- **Updates**: Frequent updates with latest optimizations and fixes
+- **Platform**: Windows and Linux
+- **Runtime**: Windows bundles ROCm runtime; Linux uses bundled runtime or system `/opt/rocm`
+- **Best For**: Users who want the latest performance optimizations
+
+### Stable Channel
+```json
+{
+  "llamacpp": {
+    "rocm_channel": "stable"
+  }
+}
+```
+- **Source**: Upstream [llama.cpp](https://github.com/ggerganov/llama.cpp) releases
+- **Binaries**:
+  - **Windows**: Self-contained HIP binaries (no separate runtime needed)
+  - **Linux**: Binaries built against ROCm 7.2 runtime
+- **Updates**: Follows upstream llama.cpp release cycle
+- **Platform**: Windows and Linux
+- **Runtime**:
+  - Windows: Self-contained, no runtime installation required
+  - Linux: Downloads AMD ROCm 7.2.1 runtime if not present at `/opt/rocm`
+- **Best For**: Users who prefer stable, tested releases aligned with upstream
+
+### Changing Channels
+
+To switch between channels, update your `config.json`:
+
+```json
+{
+  "llamacpp": {
+    "rocm_channel": "stable"
+  }
+}
+```
+
+Or use the Lemonade CLI:
+```bash
+# Switch to stable channel
+lemonade config set llamacpp.rocm_channel stable
+
+# Switch back to preview channel
+lemonade config set llamacpp.rocm_channel preview
+```
+
+After changing channels, you'll need to reinstall the ROCm backend:
+```bash
+lemonade backend install llamacpp rocm
+```
+
+## Choosing the Right Backend
+
+### Decision Tree
+
+1. **Do you have an NVIDIA or Intel GPU?**
+   - Use **Vulkan**
+
+2. **Do you have an AMD GPU?**
+   - **For Radeon RX 6000/7000 or Ryzen AI iGPU**:
+     - Try **ROCm** first for best performance
+     - Fall back to **Vulkan** if you encounter issues
+   - **For older AMD GPUs (RX 5000 and earlier)**:
+     - Use **Vulkan** (ROCm not supported)
+
+3. **Do you have Apple Silicon?**
+   - Use **Metal**
+
+4. **No GPU or unsupported GPU?**
+   - Use **CPU**
+
+### ROCm Channel Selection
+
+- **Use Preview** if you:
+  - Want the best performance on AMD hardware
+  - Are comfortable with frequent updates
+  - Are testing new models or features
+
+- **Use Stable** if you:
+  - Prefer stability over latest features
+  - Want upstream llama.cpp compatibility
+  - Are deploying in production
+
+## Platform Specifics
+
+### Linux
+- All backends supported (CPU, Vulkan, ROCm, System)
+- ROCm requires compatible AMD GPU (see above)
+- System backend requires manual llama-server installation
+
+### Windows
+- Supported: CPU, Vulkan, ROCm
+- ROCm requires compatible AMD GPU
+- No system backend support
+
+### macOS
+- Supported: CPU, Metal
+- Metal recommended for all Macs with Metal support
+
+## Troubleshooting
+
+### ROCm Backend Not Available
+- Verify your AMD GPU is RDNA2 or newer (RX 6000+ or Ryzen AI iGPU)
+- On Linux with Strix Halo (gfx1151), ensure kernel 6.13+ with CWSR support
+- Check `/api/v1/system-info` endpoint for backend availability
+
+### Performance Issues
+- Try switching between Vulkan and ROCm to compare
+- For ROCm, try both preview and stable channels
+- Check VRAM usage - some models may be too large for your GPU
+
+### Installation Failures
+- Ensure stable internet connection for downloads
+- Check disk space in Lemonade cache directory
+- For ROCm on Linux, verify `/opt/rocm` permissions if using system runtime
+
+## Additional Resources
+
+- [Lemonade CLI Documentation](lemonade-cli.md)
+- [llama.cpp GitHub](https://github.com/ggerganov/llama.cpp)
+- [AMD ROCm Documentation](https://rocm.docs.amd.com/)
+- [Vulkan Documentation](https://www.vulkan.org/)
diff --git a/src/cpp/include/lemon/backends/backend_utils.h b/src/cpp/include/lemon/backends/backend_utils.h
@@ -71,6 +71,21 @@ namespace lemon::backends {
         /** Get the latest version number for the given recipe/backend */
         static std::string get_backend_version(const std::string& recipe, const std::string& backend);
 
+        /** Check if ROCm libraries are installed system-wide (Linux only) */
+        static bool is_rocm_installed_system_wide();
+
+        /** Get TheRock installation directory for a specific architecture and version */
+        static std::string get_therock_install_dir(const std::string& arch, const std::string& version);
+
+        /** Download and install TheRock ROCm tarball for the specified architecture (Linux only) */
+        static void install_therock(const std::string& arch, const std::string& version);
+
+        /** Clean up old TheRock versions, keeping only the specified version */
+        static void cleanup_old_therock_versions(const std::string& current_version);
+
+        /** Get TheRock lib directory path if available, or empty string if not needed */
+        static std::string get_therock_lib_path(const std::string& rocm_arch);
+
         /** Get the path to the backend's binary. Gives precedence to the path set through environment variables, if set. Throws if not found. */
         static std::string get_backend_binary_path(const BackendSpec& spec, const std::string& backend);
 

diff --git a/src/cpp/include/lemon/runtime_config.h b/src/cpp/include/lemon/runtime_config.h
@@ -35,6 +35,7 @@ class RuntimeConfig {
     bool offline() const;
     bool disable_model_filtering() const;
     bool enable_dgpu_gtt() const;
+    std::string rocm_channel() const;
 
     // Backend settings (nested)
     json backend_config(const std::string& backend_name) const;