diff --git a/.github/workflows/aws_gpu_benchmarks.yml b/.github/workflows/aws_gpu_benchmarks.yml index c95b44a7f..0cfdec5dd 100644 --- a/.github/workflows/aws_gpu_benchmarks.yml +++ b/.github/workflows/aws_gpu_benchmarks.yml @@ -131,6 +131,44 @@ jobs: uvx asv compare --split ${{ inputs.base_ref }} ${{ inputs.ref }} exit 2 + - name: Re-run instructions + if: failure() + run: | + # Create error annotations (appear at top of job summary) + echo "::error::DO NOT use 'Re-run failed jobs' - the EC2 runner no longer exists and your job will be queued forever." + echo "::error::USE 'Re-run all jobs' instead to start a fresh EC2 runner." + + # Write to job summary (appears in Summary tab) + cat >> "$GITHUB_STEP_SUMMARY" << 'EOF' + ## ⚠️ How to Re-run This Workflow + + This workflow uses **ephemeral EC2 runners** that are terminated after each run. + + | | Option | Result | + |---|--------|--------| + | ❌ | **Re-run failed jobs** | Runner no longer exists → job queued forever | + | ✅ | **Re-run all jobs** | Starts new EC2 runner → benchmarks re-run | + EOF + + # Also print to log for completeness + cat << 'EOF' + + ================================================================================ + ⚠️ IMPORTANT: HOW TO RE-RUN THIS WORKFLOW + ================================================================================ + + This workflow uses ephemeral EC2 runners that are terminated after each run. + + ❌ DO NOT select "Re-run failed jobs" + → The runner no longer exists and your job will be queued forever. + + ✅ DO select "Re-run all jobs" + → This will start a new EC2 runner and re-run the benchmarks. + + ================================================================================ + + EOF + stop-runner: name: Stop self-hosted EC2 runner runs-on: ubuntu-latest diff --git a/.github/workflows/aws_gpu_tests.yml b/.github/workflows/aws_gpu_tests.yml index a7c554ca6..34becdf1d 100644 --- a/.github/workflows/aws_gpu_tests.yml +++ b/.github/workflows/aws_gpu_tests.yml @@ -118,6 +118,44 @@ jobs: flags: unittests token: ${{ secrets.CODECOV_TOKEN }} + - name: Re-run instructions + if: failure() + run: | + # Create error annotations (appear at top of job summary) + echo "::error::DO NOT use 'Re-run failed jobs' - the EC2 runner no longer exists and your job will be queued forever." + echo "::error::USE 'Re-run all jobs' instead to start a fresh EC2 runner." + + # Write to job summary (appears in Summary tab) + cat >> "$GITHUB_STEP_SUMMARY" << 'EOF' + ## ⚠️ How to Re-run This Workflow + + This workflow uses **ephemeral EC2 runners** that are terminated after each run. + + | | Option | Result | + |---|--------|--------| + | ❌ | **Re-run failed jobs** | Runner no longer exists → job queued forever | + | ✅ | **Re-run all jobs** | Starts new EC2 runner → tests re-run | + EOF + + # Also print to log for completeness + cat << 'EOF' + + ================================================================================ + ⚠️ IMPORTANT: HOW TO RE-RUN THIS WORKFLOW + ================================================================================ + + This workflow uses ephemeral EC2 runners that are terminated after each run. + + ❌ DO NOT select "Re-run failed jobs" + → The runner no longer exists and your job will be queued forever. + + ✅ DO select "Re-run all jobs" + → This will start a new EC2 runner and re-run the tests. + + ================================================================================ + + EOF + stop-runner: name: Stop self-hosted EC2 runner runs-on: ubuntu-latest diff --git a/.github/workflows/push_aws_gpu.yml b/.github/workflows/push_aws_gpu.yml index d9c7f11dd..0a7170619 100644 --- a/.github/workflows/push_aws_gpu.yml +++ b/.github/workflows/push_aws_gpu.yml @@ -13,5 +13,6 @@ on: jobs: run-tests: + if: github.repository == 'newton-physics/newton' uses: ./.github/workflows/aws_gpu_tests.yml secrets: inherit