Simulation Tests (Nightly) #220

Workflow file for this run

.github/workflows/simulation-nightly.yml at 7655eb1

	name: Simulation Tests (Nightly)

	# Long-running and large-scale simulation tests too slow for PR CI.
	# Medium-scale fdev tests (50 nodes, fault tolerance, high latency, churn
	# resilience) now run in the main CI workflow. This workflow covers:
	# - Nightly-gated nextest tests (1h virtual time, 250-contract scale)
	# - Large scale (500 nodes, 10000 events)

	on:
	schedule:
	# Run at 3 AM UTC every day
	- cron: "0 3 * * *"
	workflow_dispatch:
	# Allow manual triggering from any branch
	inputs:
	ref:
	description: "Git ref to checkout (branch, tag, or SHA). Leave empty to use the branch selected in the UI."
	required: false
	default: ""
	seed:
	description: "Simulation seed for reproducibility (hex, e.g., 0xDEADBEEF)"
	required: false
	default: ""

	# Cancel in-progress runs when a new run is triggered
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	large-scale-simulation:
	name: Large Scale Simulation
	runs-on: ubicloud-standard-16
	timeout-minutes: 150

	env:
	RUST_LOG: info,turmoil=warn
	CARGO_TARGET_DIR: ${{ github.workspace }}/target
	RUST_MIN_STACK: 16777216
	# Use mold linker to avoid rust-lld crashes
	CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER: clang
	CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS: -C link-arg=-fuse-ld=mold
	# Simulation seed - use input if provided, otherwise use fixed seed for reproducibility
	SIMULATION_SEED: ${{ github.event.inputs.seed \|\| '0xDEADBEEF' }}

	steps:
	- uses: actions/checkout@v6
	with:
	ref: ${{ github.event.inputs.ref \|\| github.ref }}

	- name: Show checked out ref
	run: \|
	echo "Checked out ref: $(git rev-parse --abbrev-ref HEAD \|\| git rev-parse HEAD)"
	echo "Commit: $(git rev-parse HEAD)"

	- uses: dtolnay/rust-toolchain@stable
	with:
	toolchain: 1.93.0

	- uses: Swatinem/rust-cache@v2
	with:
	prefix-key: simulation-large
	save-if: ${{ github.ref == 'refs/heads/main' }}

	- name: Install mold linker
	run: \|
	sudo apt-get update
	sudo apt-get install -y mold

	- name: Install nextest
	run: curl -LsSf https://get.nexte.st/latest/linux \| tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin

	- name: Clean test directories
	run: \|
	rm -rf /tmp/freenet /tmp/freenet-* 2>/dev/null \|\| true

	# Build fdev for simulation tests
	- name: Build fdev
	run: cargo build -p fdev --release

	# Medium scale with alternate seed (explores different code paths).
	# The primary seed (0xDEADBEEF) runs in PR CI; this covers a second path.
	- name: Medium scale test (50 nodes, seed 0xCAFEBABE)
	run: \|
	target/release/fdev test \
	--name "nightly-medium-50-alt" \
	--seed 0xCAFEBABE \
	--gateways 4 --nodes 46 --events 2000 \
	--ring-max-htl 12 --max-connections 20 --min-connections 6 \
	--latency-min 10 --latency-max 50 \
	--min-success-rate 1.0 \
	--print-summary --print-network-stats \
	single-process

	# Nightly-gated simulation tests (long-running, high-scale regression tests)
	# Uses Turmoil deterministic scheduler for reproducible results
	# Includes:
	# - test_long_running_deterministic: 1 hour virtual time
	# - test_subscription_renewal_at_scale: 250 contracts regression test
	# NOTE: Gated by nightly_tests feature — does NOT run in regular CI
	- name: Nightly simulation tests (long-running + high-scale)
	run: \|
	echo "Running all nightly simulation tests (gated by nightly_tests feature)"
	cargo nextest run -p freenet \
	--features "simulation_tests,testing,nightly_tests" \
	--test simulation_integration \
	--test-threads 1 \
	--no-capture \
	--profile nightly

	# Large scale test (500 nodes) - uses direct runner (no turmoil overhead)
	# With simulation-mode timer optimizations (~5x fewer timer firings),
	# completes in ~4 min locally for 10000 events.
	- name: Large scale test (500 nodes)
	timeout-minutes: 60
	env:
	RUST_LOG: warn
	run: \|
	echo "Running large scale simulation (500 nodes, 10000 events)"
	target/release/fdev test \
	--name "nightly-large-500" \
	--seed 0x500BEEF \
	--gateways 10 \
	--nodes 490 \
	--events 10000 \
	--ring-max-htl 15 \
	--max-connections 30 \
	--min-connections 10 \
	--latency-min 10 \
	--latency-max 50 \
	--min-success-rate 1.0 \
	--print-summary \
	--print-network-stats \
	single-process

	# Real-process soak test - disabled until the test runs properly
	# TODO: Re-enable once large_network test is stable
	# - name: Run large network soak test (50+ real nodes)
	# run: \|
	# echo "Running large network soak test with seed: $SIMULATION_SEED"
	# if command -v riverctl &> /dev/null; then
	# cargo nextest run -p freenet --test large_network \
	# -E 'test(large_network_soak)' \
	# --test-threads 1 \
	# --no-capture \
	# --profile nightly
	# else
	# echo "Skipping large_network test - riverctl not installed"
	# fi

	# Notify the Freenet dev room on failure.
	#
	# This used to post to Matrix via a direct curl against the matrix.org REST
	# API using a MATRIX_ACCESS_TOKEN secret. That token expired at some point
	# and every nightly failure since 2026-04-07 was also failing to deliver
	# the notification (HTTP 401 M_UNKNOWN_TOKEN). Rather than rotate another
	# standalone token, consolidate on the same transport
	# `river_pr_merge_notify.yml` already uses: `riverctl` against the Freenet
	# gateway, authenticated with the shared `RIVER_SIGNING_KEY` secret. One
	# set of credentials to maintain, and the message ends up in the same
	# Freenet room subscribers are already watching.
	notify-failure:
	name: Notify River on Failure
	runs-on: ubuntu-latest
	timeout-minutes: 20
	needs: large-scale-simulation
	if: failure() && needs.large-scale-simulation.result == 'failure'
	continue-on-error: true
	steps:
	- name: Install Rust
	uses: dtolnay/rust-toolchain@stable
	with:
	toolchain: 1.93.0

	- name: Install riverctl
	run: cargo install riverctl

	- name: Send failure message to River (with exponential backoff)
	env:
	RIVER_SIGNING_KEY: ${{ secrets.RIVER_SIGNING_KEY }}
	run: \|
	MESSAGE="🚨 Nightly Simulation Tests Failed - ${{ github.repository }} - branch: ${{ github.ref_name }} - commit: ${{ github.sha }} - seed: ${{ github.event.inputs.seed \|\| '0xDEADBEEF' }} - ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	NODE_URL="${{ secrets.RIVER_GATEWAY_URL }}"
	ROOM_ID="${{ secrets.RIVER_ROOM_ID }}"

	max_attempts=7
	delay=10
	for attempt in $(seq 1 $max_attempts); do
	echo "Attempt $attempt/$max_attempts (delay: ${delay}s)..."
	if riverctl --node-url "$NODE_URL" message send "$ROOM_ID" "$MESSAGE" 2>&1; then
	echo "Message sent successfully on attempt $attempt"
	exit 0
	fi
	if [ "$attempt" -lt "$max_attempts" ]; then
	echo "Failed, retrying in ${delay}s..."
	sleep "$delay"
	delay=$((delay * 2))
	fi
	done
	echo "::warning::Failed to send River notification after $max_attempts attempts (gateway may be restarting)"
	exit 0

	# Mirror the nightly-failure notification into the private freenet-dev room
	# (the existing notify-failure job above posts to the public official room).
	notify-dev-room-failure:
	name: Notify dev room on Failure
	runs-on: ubuntu-latest
	needs: large-scale-simulation
	if: failure() && needs.large-scale-simulation.result == 'failure'
	continue-on-error: true
	steps:
	- uses: actions/checkout@v6
	- uses: ./.github/actions/river-dev-notify
	with:
	message: "🚨 Nightly simulation tests failed — ${{ github.repository }} — branch ${{ github.ref_name }} — commit ${{ github.sha }} — seed ${{ github.event.inputs.seed \|\| '0xDEADBEEF' }} — ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	bot-config: ${{ secrets.RIVER_DEV_BOT_CONFIG }}
	room-id: ${{ secrets.RIVER_DEV_ROOM_ID }}
	gateway-url: ${{ secrets.RIVER_GATEWAY_URL }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Simulation Tests (Nightly) #220

Workflow file

Simulation Tests (Nightly) #220

Uh oh!

Workflow file for this run