Removed deprecated RendererGL, RendererUSD and updated docs, examples #27

Workflow file for this run

.github/workflows/push_aws_gpu_tests.yml at a15584a

	name: Push Events - AWS GPU Tests

	# Workflow configuration variables
	env:
	AWS_REGION: us-east-2
	AWS_INSTANCE_TYPE: g6e.xlarge
	AWS_VOLUME_SIZE: 64
	AWS_VOLUME_TYPE: gp3
	AWS_SECURITY_GROUP_IDS: sg-07807c44e7f2a368a
	AWS_ROLE_ARN: arn:aws:iam::968945269301:role/newton-physics-newton-github-actions-role-Role-GbdM0RBoT4xW
	AWS_ROLE_DURATION: 7200
	AWS_S3_BUCKET: newton-github-workflow-artifacts

	on:
	push:
	paths-ignore:
	- "docs/**"
	- "**.md"
	- ".gitlab-ci.yml"
	- ".gitignore"

	jobs:
	aws-unit-tests-push:
	name: Run GPU Unit Tests on AWS EC2 (Push)
	if: github.repository == 'newton-physics/newton'
	runs-on: ubuntu-latest
	timeout-minutes: 120
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df
	with:
	aws-region: ${{ env.AWS_REGION }}
	role-to-assume: ${{ env.AWS_ROLE_ARN }}
	role-duration-seconds: ${{ env.AWS_ROLE_DURATION }}

	- name: Launch EC2 instance
	run: \|
	echo "Finding the latest AWS Deep Learning Base GPU AMI..."
	LATEST_AMI_ID=$(aws ssm get-parameter --region ${{ env.AWS_REGION }} \
	--name /aws/service/deeplearning/ami/x86_64/base-oss-nvidia-driver-gpu-ubuntu-24.04/latest/ami-id \
	--query "Parameter.Value" \
	--output text)
	if [[ -z "$LATEST_AMI_ID" ]]; then
	echo "❌ No AMI ID found. Exiting."
	exit 1
	fi

	echo "Latest AMI ID found: $LATEST_AMI_ID"
	echo "Launching EC2 instance..."

	# --- Define all tags ---
	TAGS="{Key=Name,Value=newton-github-workflow-runner},{Key=created-by,Value=github-actions-newton-role}"
	TAGS="$TAGS,{Key=GitHub-Repository,Value=${{ github.repository }}}"
	TAGS="$TAGS,{Key=Source-Event,Value=Push}"

	INSTANCE_ID=$(aws ec2 run-instances \
	--image-id $LATEST_AMI_ID \
	--region ${{ env.AWS_REGION }} \
	--instance-type ${{ env.AWS_INSTANCE_TYPE }} \
	--security-group-ids ${{ env.AWS_SECURITY_GROUP_IDS }} \
	--iam-instance-profile Name="NewtonEC2InstanceRole" \
	--block-device-mappings "DeviceName=/dev/sda1,Ebs={VolumeSize=${{ env.AWS_VOLUME_SIZE }},VolumeType=${{ env.AWS_VOLUME_TYPE }}}" \
	--tag-specifications "ResourceType=instance,Tags=[$TAGS]" \
	--query 'Instances[0].InstanceId' \
	--output text)

	echo "Instance launched with ID: $INSTANCE_ID"
	echo "INSTANCE_ID=$INSTANCE_ID" >> "$GITHUB_ENV"
	echo "$INSTANCE_ID" > instance_id.txt

	echo "Waiting for instance to be running..."
	aws ec2 wait instance-status-ok --instance-ids $INSTANCE_ID

	- name: Upload instance ID artifact
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: unittest-instance-id-artifact
	path: instance_id.txt

	- name: Generate script to run on instance
	env:
	BRANCH_NAME_ENV: ${{ github.ref_name }}
	S3_BUCKET: ${{ env.AWS_S3_BUCKET }}
	run: \|
	# A unique key for this run to avoid file collisions
	S3_KEY="reports/${{ github.run_id }}/${{ github.run_attempt }}"
	SSM_LOG_PREFIX="ssm-logs/${{ github.run_id }}/${{ github.run_attempt }}"
	SAFE_BRANCH=$(printf '%q' "${BRANCH_NAME_ENV}")

	echo "S3_KEY=${S3_KEY}" >> "$GITHUB_ENV"
	echo "SSM_LOG_PREFIX=${SSM_LOG_PREFIX}" >> "$GITHUB_ENV"
	echo "SAFE_BRANCH=${SAFE_BRANCH}" >> "$GITHUB_ENV"

	cat << EOF > remote_script.sh
	#!/bin/bash
	set -euo pipefail

	# These are expanded immediately on the GitHub runner
	S3_BUCKET="${S3_BUCKET}"
	S3_KEY="${S3_KEY}"

	function finish {
	# This is escaped to run on the remote instance
	EXIT_CODE=\$?
	echo "--- Script finished with exit code \$EXIT_CODE ---"
	aws s3 cp /tmp/rspec.xml "s3://\${S3_BUCKET}/\${S3_KEY}/rspec.xml" 2>&1 \|\| true
	aws s3 cp /tmp/coverage.xml "s3://\${S3_BUCKET}/\${S3_KEY}/coverage.xml" 2>&1 \|\| true
	}
	trap finish EXIT

	echo "--- Starting sequence on EC2 instance ---"
	date

	# These are expanded immediately on the GitHub runner
	REPO_URL="${{ github.server_url }}/${{ github.repository }}.git"
	REPO_DIR="${{ github.event.repository.name }}"

	curl -LsSf https://astral.sh/uv/install.sh \| env UV_INSTALL_DIR="/tmp/uv-installation" sh
	source /tmp/uv-installation/env

	# --- Cloning Repository ---
	git clone "\$REPO_URL" "\$REPO_DIR"
	cd "\$REPO_DIR"

	# --- Checkout correct commit ---
	git checkout "${SAFE_BRANCH}"

	echo "Running Newton test suite..."
	uv run --extra dev --extra torch-cu12 -m newton.tests --junit-report-xml /tmp/rspec.xml --coverage --coverage-xml /tmp/coverage.xml --serial-fallback --failfast 2>&1
	EOF

	# Prepare script to be passed as SSM parameters
	jq -n --arg script_body "$(cat remote_script.sh)" \
	--arg timeout "$AWS_ROLE_DURATION" \
	'{ "commands": [$script_body], "executionTimeout": [$timeout] }' > ssm_params.json

	# Echo script for debugging
	cat remote_script.sh

	- name: Run script on instance
	env:
	S3_BUCKET: ${{ env.AWS_S3_BUCKET }}
	run: \|
	echo "--- Sending command to instance $INSTANCE_ID via SSM ---"
	COMMAND_ID=$(aws ssm send-command \
	--instance-ids "$INSTANCE_ID" \
	--document-name "AWS-RunShellScript" \
	--comment "Running Newton GPU unit tests" \
	--parameters file://ssm_params.json \
	--output-s3-bucket-name "$S3_BUCKET" \
	--output-s3-key-prefix "$SSM_LOG_PREFIX" \
	--query "Command.CommandId" \
	--output text)

	echo "SSM Command ID: $COMMAND_ID"

	echo "--- Waiting for command to complete... ---"
	final_status="Success"
	TIMEOUT=6600
	INTERVAL=30
	elapsed_time=0

	while [ $elapsed_time -lt $TIMEOUT ]; do
	STATUS=$(aws ssm get-command-invocation \
	--command-id "$COMMAND_ID" \
	--instance-id "$INSTANCE_ID" \
	--query "Status" \
	--output text)

	if [[ "$STATUS" == "Success" ]]; then
	break
	elif [[ "$STATUS" == "InProgress" \|\| "$STATUS" == "Pending" ]]; then
	:
	else
	final_status="$STATUS"
	echo "SSM command failed or returned unexpected status: $STATUS"
	break
	fi

	sleep $INTERVAL
	elapsed_time=$((elapsed_time + INTERVAL))
	echo "Current status: $STATUS. Waited $elapsed_time seconds..."
	done

	if [ $elapsed_time -ge $TIMEOUT ]; then
	final_status="Timeout"
	fi

	echo "--- Downloading artifacts from S3 ---"
	aws s3 cp "s3://${S3_BUCKET}/${S3_KEY}/" . --recursive

	# --- Get the exact S3 output URL from the command invocation ---
	S3_STDOUT_URL=$(aws ssm get-command-invocation --command-id "$COMMAND_ID" --instance-id "$INSTANCE_ID" --query "StandardOutputUrl" --output text)

	if [ -z "$S3_STDOUT_URL" ]; then
	echo "❌ Could not retrieve S3 output URL. Cannot fetch logs."
	exit 1
	fi

	# Convert the https:// URL to an s3:// URI
	BUCKET_AND_KEY=$(echo "$S3_STDOUT_URL" \| sed 's\|https://s3\.[^.]*\.amazonaws\.com/\|\|')
	S3_URI="s3://${BUCKET_AND_KEY}"

	LOCAL_LOG_FILE="ssm_output.log"
	echo "Downloading combined log from ${S3_URI}"

	if ! aws s3 cp "${S3_URI}" "${LOCAL_LOG_FILE}"; then
	echo "❌ Failed to download logs from S3."
	exit 1
	fi

	echo "--- Downloaded Log Output ---"
	cat "${LOCAL_LOG_FILE}"

	# --- Final status check ---
	if [[ "$final_status" == "Success" ]]; then
	if [ ! -f "rspec.xml" ]; then
	final_status="MissingRspecFile"
	elif [ ! -f "coverage.xml" ]; then
	final_status="MissingCoverageFile"
	fi
	fi

	if [[ "$final_status" != "Success" ]]; then
	echo "❌ Workflow failed with status: $final_status"
	exit 1
	else
	echo "✅ Workflow completed successfully."
	fi

	- name: Test Summary
	if: ${{ !cancelled() }}
	uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86
	with:
	paths: "rspec.xml"
	show: "fail"

	- name: Upload test results to Codecov
	if: ${{ !cancelled() }}
	uses: codecov/test-results-action@f2dba722c67b86c6caa034178c6e4d35335f6706
	with:
	env_vars: AWS_INSTANCE_TYPE
	files: ./rspec.xml
	token: ${{ secrets.CODECOV_TOKEN }}

	- name: Upload coverage reports to Codecov
	if: ${{ !cancelled() }}
	uses: codecov/codecov-action@ad3126e916f78f00edff4ed0317cf185271ccc2d
	with:
	env_vars: AWS_INSTANCE_TYPE
	files: ./coverage.xml
	flags: unittests
	token: ${{ secrets.CODECOV_TOKEN }}

	- name: Save test artifacts
	if: ${{ !cancelled() }}
	uses: actions/upload-artifact@v4
	with:
	if-no-files-found: ignore
	name: test-artifacts
	path: \|
	rspec.xml
	coverage.xml
	ssm_output.log

	cleanup:
	# This runs as a separate job to ensure cleanup always occurs, even if the
	# main job's runner fails unexpectedly. This prevents orphaned EC2 instances.
	name: Cleanup EC2 Instance
	runs-on: ubuntu-latest
	needs: aws-unit-tests-push
	if: always() && github.repository == 'newton-physics/newton'
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Download instance ID artifact
	uses: actions/download-artifact@v4
	with:
	name: unittest-instance-id-artifact
	path: .

	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@b47578312673ae6fa5b5096b330d9fbac3d116df
	with:
	aws-region: ${{ env.AWS_REGION }}
	role-to-assume: ${{ env.AWS_ROLE_ARN }}

	- name: Read instance ID and terminate EC2 Instance
	run: \|
	if [ ! -f instance_id.txt ]; then
	echo "Instance ID file not found. Nothing to terminate."
	exit 0
	fi
	INSTANCE_ID=$(cat instance_id.txt)
	if [ -z "$INSTANCE_ID" ]; then
	echo "Instance ID is empty. Nothing to terminate."
	exit 0
	fi
	echo "Terminating instance: $INSTANCE_ID"
	aws ec2 terminate-instances --instance-ids $INSTANCE_ID

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Removed deprecated RendererGL, RendererUSD and updated docs, examples #27

Workflow file

Removed deprecated RendererGL, RendererUSD and updated docs, examples #27

Uh oh!

Workflow file for this run