diff --git a/.github/workflows/llama-perf.yml b/.github/workflows/llama-perf.yml index 1a1306f4..e0fdb2bf 100644 --- a/.github/workflows/llama-perf.yml +++ b/.github/workflows/llama-perf.yml @@ -231,7 +231,7 @@ jobs: wget -O /usr/local/bin/bazel "$BIN_URL/v1.26.0/bazelisk-linux-amd64" chmod +x /usr/local/bin/bazel bazel version - apt update && apt install -y libdw1 libglib2.0-0 + apt update && apt install -y libglib2.0-0 export TRAIN_SETTING="${{ matrix.model-name }}" cd /workspace && bash -i bazel_run_anynode.sh 2>&1 | tee logs.log tail -n 25 logs.log > training_summary.txt' diff --git a/.github/workflows/rocm-perf.yml b/.github/workflows/rocm-perf.yml index 3d2e9fbf..586838b6 100644 --- a/.github/workflows/rocm-perf.yml +++ b/.github/workflows/rocm-perf.yml @@ -91,8 +91,6 @@ jobs: - name: Run MaxText training and save logs run: | - docker exec maxtext_container bash -c \ - "apt update && apt install -y libdw1" for config in \ MaxText/configs/models/gpu/llama2_7b_rocm.yml \ MaxText/configs/models/gpu/gemma_2b_rocm.yml \ diff --git a/docker/Dockerfile.jax-ubu22 b/docker/Dockerfile.jax-ubu22 index f14fe059..97c8aedc 100644 --- a/docker/Dockerfile.jax-ubu22 +++ b/docker/Dockerfile.jax-ubu22 @@ -15,6 +15,9 @@ RUN --mount=type=cache,target=/var/cache/apt \ libbz2-dev liblzma-dev \ libncursesw5-dev xz-utils \ tk-dev uuid-dev \ + # workaround for ROCm 7.0.x missing dep + # Remove once ROCm/ROCm-docker#158 landed + libdw1 \ && apt-get clean && rm -rf /var/lib/apt/lists/* # Download and unpack python-v3.11 diff --git a/docker/Dockerfile.jax-ubu24 b/docker/Dockerfile.jax-ubu24 index 23ee7824..343326cd 100644 --- a/docker/Dockerfile.jax-ubu24 +++ b/docker/Dockerfile.jax-ubu24 @@ -9,6 +9,9 @@ RUN --mount=type=cache,target=/var/cache/apt \ RUN apt-get update && apt-get install -y \ sqlite3 libsqlite3-dev \ libbz2-dev \ + # workaround for ROCm 7.0.x missing dep + # Remove once ROCm/ROCm-docker#158 landed + libdw1t64 \ && rm -rf /var/lib/apt/lists/* # Add target file to help determine which device(s) to build for