Weekly Tests #99
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Weekly Tests | |
| on: | |
| workflow_dispatch: # manually dispatch | |
| # push: | |
| schedule: | |
| - cron: '0 20 * * FRI' # 8:00 PM every Friday | |
| env: | |
| BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
| jobs: | |
| Tracer-Weekly: | |
| timeout-minutes: 720 | |
| if: ${{ github.repository == 'accel-sim/accel-sim-framework' || github.event_name == 'workflow_dispatch' }} | |
| runs-on: tgrogers-raid | |
| defaults: | |
| run: | |
| shell: bash | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: dev | |
| - name: Setup Environment | |
| run: | | |
| rm -rf env-setup | |
| git clone --quiet git@github.com:purdue-aalp/env-setup.git | |
| cd env-setup | |
| git checkout cluster-ubuntu | |
| - name: Build Tracer | |
| run: | | |
| source ./env-setup/12.8_env_setup.sh | |
| ./util/tracer_nvbit/install_nvbit.sh | |
| make clean -C ./util/tracer_nvbit/ | |
| make -C ./util/tracer_nvbit/ | |
| - name: build applications | |
| run: | | |
| source ./env-setup/12.8_env_setup.sh | |
| export PATH=/home/tgrogers-raid/a/common/python2:$PATH | |
| rm -rf ./gpu-app-collection/ | |
| git clone --quiet --recurse-submodules https://github.com/accel-sim/gpu-app-collection.git | |
| source ./gpu-app-collection/src/setup_environment | |
| ln -s /home/tgrogers-raid/a/common/data_dirs ./gpu-app-collection/ | |
| make -j8 -C ./gpu-app-collection/src rodinia_2.0-ft | |
| make -j8 -C ./gpu-app-collection/src rodinia-3.1 | |
| make -j8 -C ./gpu-app-collection/src GPU_Microbenchmark | |
| # make -j8 -C ./gpu-app-collection/src Deepbench_nvidia | |
| # make -j8 -C ./gpu-app-collection/src parboil | |
| # make -j8 -C ./gpu-app-collection/src polybench | |
| # make -j8 -C ./gpu-app-collection/src cutlass | |
| - name: generate traces | |
| run: | | |
| source ./env-setup/12.8_env_setup.sh | |
| source ./gpu-app-collection/src/setup_environment | |
| rm -rf ./hw_run/ | |
| srun --job-name=gpu-lock --dependency=singleton --partition=tgrogers-dgx -- ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -D 7 | |
| srun --job-name=gpu-lock --dependency=singleton --partition=tgrogers-dgx -- ./util/hw_stats/run_hw.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -D 7 | |
| rm -rf /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces | |
| mkdir -p /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces | |
| mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run | |
| # ./util/tracer_nvbit/run_hw_trace.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark,parboil,polybench,cutlass_5_trace,Deepbench_nvidia_tencore,Deepbench_nvidia_normal -D 7 | |
| - name: generate-spinlock-traces-spinlock_handling | |
| run: | | |
| source ./env-setup/12.8_env_setup.sh | |
| source ./gpu-app-collection/src/setup_environment | |
| rm -rf ./hw_run/ | |
| mkdir -p ./hw_run/ | |
| srun --job-name=gpu-lock --dependency=singleton --partition=tgrogers-dgx -- ./util/tracer_nvbit/run_hw_trace.py -B Spinlock -D 7 --spinlock_handling fast_forward | |
| mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_fast_forward | |
| mkdir -p ./hw_run/ | |
| srun --job-name=gpu-lock --dependency=singleton --partition=tgrogers-dgx -- ./util/tracer_nvbit/run_hw_trace.py -B Spinlock -D 7 --spinlock_handling none | |
| mv ./hw_run /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_none | |
| SASS-Weekly: | |
| timeout-minutes: 720 | |
| needs: [Tracer-Weekly] | |
| if: ${{ github.repository == 'accel-sim/accel-sim-framework' || github.event_name == 'workflow_dispatch' }} | |
| runs-on: tgrogers-raid | |
| defaults: | |
| run: | |
| shell: bash | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: dev | |
| - name: Setup Environment | |
| run: | | |
| rm -rf env-setup | |
| git clone --quiet git@github.com:purdue-aalp/env-setup.git | |
| cd env-setup | |
| git checkout cluster-ubuntu | |
| - name: Build Accel-Sim | |
| run: | | |
| source ./env-setup/12.8_env_setup.sh | |
| rm -rf ./gpu-simulator/gpgpu-sim | |
| # Clone gpgpu-sim with fork-aware branch selection | |
| echo "Cloning gpgpu-sim with fork-aware branch selection..." | |
| git clone --quiet git@github.com:accel-sim/gpgpu-sim_distribution.git ./gpu-simulator/gpgpu-sim | |
| current_owner=$(echo ${{ github.repository }} | cut -d'/' -f1) | |
| current_branch=$BRANCH_NAME | |
| current_repo=$(echo $GITHUB_REPOSITORY | cut -d'/' -f2) | |
| gpgpusim_repo=$(echo $current_repo | sed 's/accel-sim-framework/gpgpu-sim_distribution/') | |
| echo "Attempting to checkout branch '$BRANCH_NAME' from '$current_owner/$gpgpusim_repo'" | |
| # First, try to add the fork owner's repository as a remote and check if the branch exists | |
| if git -C ./gpu-simulator/gpgpu-sim/ remote add fork-owner git@github.com:$current_owner/$gpgpusim_repo.git 2>/dev/null; then | |
| # Check if the branch exists in the fork owner's repository | |
| if git -C ./gpu-simulator/gpgpu-sim/ ls-remote fork-owner | grep -q "refs/heads/$BRANCH_NAME"; then | |
| echo "Found branch '$BRANCH_NAME' in '$current_owner/$gpgpusim_repo' repository, checking it out" | |
| git -C ./gpu-simulator/gpgpu-sim/ fetch fork-owner | |
| git -C ./gpu-simulator/gpgpu-sim/ checkout -B $BRANCH_NAME fork-owner/$BRANCH_NAME | |
| else | |
| echo "Branch '$BRANCH_NAME' not found in '$current_owner/$gpgpusim_repo' repository, falling back to accel-sim dev branch" | |
| git -C ./gpu-simulator/gpgpu-sim/ checkout -B dev origin/dev | |
| fi | |
| # Remove the temporary remote | |
| git -C ./gpu-simulator/gpgpu-sim/ remote remove fork-owner | |
| else | |
| echo "Could not add '$current_owner/$gpgpusim_repo' remote, falling back to upstream dev branch" | |
| git -C ./gpu-simulator/gpgpu-sim/ checkout -B dev origin/dev | |
| fi | |
| source ./gpu-simulator/setup_environment.sh | |
| make clean -C gpu-simulator | |
| make -j -C gpu-simulator | |
| - name: run SASS | |
| run: | | |
| source ./env-setup/12.8_env_setup.sh | |
| source ./gpu-simulator/setup_environment.sh | |
| ln -s /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run ./hw_run | |
| ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,rodinia-3.1,GPU_Microbenchmark -C QV100-SASS -T ./hw_run/traces/device-7/12.8 -N weekly-$$ -M 70G | |
| ./util/job_launching/monitor_func_test.py -T 12 -S 300 -v -s weekly-stats-per-app.csv -N weekly-$$ | |
| - name: test-new-traces-spinlock_handling | |
| # Test only fast-forwarded traces as the none one takes too long to run (~2-3 hr) | |
| run: | | |
| source ./env-setup/12.8_env_setup.sh | |
| source ./gpu-simulator/setup_environment.sh | |
| ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_fast_forward/traces/device-7/ -N spinlock-microbenchmark-$$-fast_forward | |
| ./util/job_launching/monitor_func_test.py -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-fast_forward | |
| # ./util/job_launching/run_simulations.py -B Spinlock -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/$USER/nightly-traces/hw_run_none/traces/device-7/ -N spinlock-microbenchmark-$$-none | |
| # ./util/job_launching/monitor_func_test.py -v -s spinlock-stats-per-app.csv -N spinlock-microbenchmark-$$-none | |
| failures: | |
| if: failure() | |
| env: | |
| ACTION_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
| REPORT_URL: "" | |
| runs-on: tgrogers-raid | |
| needs: [Tracer-Weekly, SASS-Weekly] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Notify Failure | |
| run: | | |
| # Setup envs | |
| git clone --quiet --branch cluster-ubuntu git@github.com:purdue-aalp/env-setup.git | |
| source ./env-setup/common/common_inc.sh | |
| export BRANCH_NAME="Weekly Tests" | |
| python3 .github/scripts/send_ci_email.py -t failure |