Skip to content

Hardware Long Tests #150

Hardware Long Tests

Hardware Long Tests #150

Workflow file for this run

name: Hardware Long Tests
# Run long tests once nightly, at 00:00
on:
schedule:
- cron: "0 0 * * *"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
jobs:
build-fw-artifact:
uses: ./.github/workflows/build-fw.yml
secrets:
SIGNATURE_KEY: ${{ secrets.SIGNATURE_KEY }}
e2e-stress-test:
strategy:
fail-fast: false
matrix:
config:
- board: p100a
runs-on:
- p100a-jtag
- board: p150a
runs-on:
- p150a-jtag
- board: p300a
runs-on:
- p300a-jtag
runs-on: ${{ matrix.config.runs-on }}
container:
image: ghcr.io/tenstorrent/tt-zephyr-platforms/ci-image:v18.12.0-rc1
volumes:
- /dev/hugepages-1G:/dev/hugepages-1G
options: '--device /dev/tenstorrent --device /dev/bus/usb --privileged'
steps:
- name: Cleanup State
if: ${{ always() }}
run: |
# Print state of current directory
ls -la
# Clean up any files left over from previous steps
rm -rf *
- name: Checkout
uses: actions/checkout@v4.2.2
with:
path: tt-zephyr-platforms
- name: Prepare Container
uses: ./tt-zephyr-platforms/.github/workflows/prepare-container
with:
app-path: tt-zephyr-platforms
- name: run-e2e-stress-test
run: |
tt-zephyr-platforms/scripts/ci/run-stress.sh ${{ matrix.config.board }}
- name: Upload Stress Test results
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: Stress test results (${{ matrix.config.board }})
include-hidden-files: true
path: |
zephyr/twister-*e2e*/**/device.log
zephyr/twister-*e2e*/**/handler.log
zephyr/twister-*e2e*/**/twister_harness.log
zephyr/twister-*e2e*/**/zephyr.dts
zephyr/twister-*e2e*/**/.config
zephyr/twister-*e2e*/**/*.map
zephyr/twister-*e2e*/**/zephyr.elf
zephyr/twister-*e2e*/**/*.lst
zephyr/twister-*e2e*/twister.log
zephyr/twister-*e2e*/twister.json
zephyr/twister-*e2e*/**/update.fwbundle
zephyr/twister-*e2e*/**/recording.csv
- name: Print RTT logs
if: ${{ failure() }}
working-directory: tt-zephyr-platforms
run: |
echo "DMC RTT logs:"
python3 ./scripts/dmc_rtt.py -n
echo "SMC RTT logs:"
python3 ./scripts/smc_console.py --rtt -n
echo "SMC state dump:"
python3 ./scripts/dump_smc_state.py
# Temporarily moved from hardware-smoke.yml until metal tests occupy less time for CI on each PR
metal-test:
needs: build-fw-artifact
strategy:
fail-fast: false
matrix:
config:
- board: p300a
runs-on:
- p300a-jtag
- board: p100a
runs-on:
- p100a-jtag
- board: p150a
runs-on:
- p150a-jtag
runs-on: ${{ matrix.config.runs-on }}
container:
image: ghcr.io/tenstorrent/tt-metal/upstream-tests-bh:v0.62.0-rc33-466-gf3b2c1316e
volumes:
- /dev/hugepages-1G:/dev/hugepages-1G
- /dev/hugepages:/dev/hugepages
options: '--device /dev/tenstorrent --device /dev/bus/usb --privileged'
env:
ARCH_NAME: blackhole
defaults:
run:
shell: bash
working-directory: /home/user/tt-metal/
steps:
- id: mkdir-home-user-tt-metal
run: |
mkdir -p /home/user/tt-metal
- name: Download the latest firmware bundle
uses: actions/download-artifact@v4
with:
name: ${{ needs.build-fw-artifact.outputs.combined-fwbundle-artifact }}
path: /home/user/tt-metal
- name: Run the rescan-pcie.sh script
run: |
curl -o /tmp/rescan-pcie.sh https://raw.githubusercontent.com/tenstorrent/tt-zephyr-platforms/${{ github.sha }}/scripts/rescan-pcie.sh
source /tmp/rescan-pcie.sh
- name: Flash the firmware
run: |
sudo chmod -R a+rwX $HOME/.cargo $HOME/.cache
python -m venv .env
source .env/bin/activate
pip install git+https://github.com/tenstorrent/tt-flash.git@v3.4.2
tt-flash --fw-tar /home/user/tt-metal/fw_pack-*.fwbundle --force
- name: Run Container Test
run: |
sh -c "dockerfile/upstream_test_images/run_upstream_tests_vanilla.sh blackhole_no_models"
- name: cleanup
if: ${{ always() }}
run: |
# Clean out metal
rm -f /home/user/tt-metal/fw_pack-*.fwbundle