GPT OSS Integration Code #4439

Workflow file for this run

.github/workflows/pre-merge.yaml at d5efa83

	name: Basic HPU test suite

	on:
	pull_request_target:
	types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
	branches:
	- main
	- releases/**

	# Allow manual triggering for testing purposes
	workflow_dispatch: {}

	permissions:
	pull-requests: write
	# This line allows it to read the status of checks like DCO
	checks: read

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: true

	jobs:
	gatekeeper:
	runs-on: ubuntu-latest
	permissions:
	# Required to read the status of checks and PR details
	checks: read
	# Add this line to allow posting comments on PRs
	pull-requests: write
	outputs:
	# This output will be used to signal if the main CI should run
	run_ci: ${{ steps.check_conditions.outputs.run_ci }}
	reason: ${{ steps.check_conditions.outputs.reason }}

	steps:
	# Add this checkout step
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	ref: ${{ github.event.pull_request.head.sha }}
	clean: true # Ensure a clean workspace before checkout
	fetch-depth: 0

	- name: Check for other blocking conditions
	id: check_conditions
	# Provide the GITHUB_TOKEN for the gh CLI to authenticate
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	BLOCKING_REASON=""

	# --- New check to see if the PR is behind the base branch ---
	echo "Checking if the PR is behind the base branch..."
	# Fetch just the latest commit from the base branch for comparison
	git fetch origin ${{ github.event.pull_request.base.ref }} --depth=1
	behind_count=$(git rev-list --count HEAD..origin/${{ github.event.pull_request.base.ref }})

	if_pr_is_behind="false"
	if [[ $behind_count -gt 0 ]]; then
	echo "PR is behind by ${behind_count} commit(s)."
	if_pr_is_behind="true"
	else
	echo "PR is up-to-date with the base branch."
	fi
	# --- End of new check ---


	if [[ "${{ github.event.pull_request.draft }}" == "true" ]]; then
	BLOCKING_REASON="This is a Draft PR. Please mark it as 'Ready for Review' to trigger the CI."

	# Use the 'gh' CLI to fetch the LIVE mergeable state after the sleep
	elif [[ "$if_pr_is_behind" == "true" ]]; then
	BLOCKING_REASON="Your branch is behind the base branch. Please merge or rebase to get the latest changes."
	fi

	if [[ -n "$BLOCKING_REASON" ]]; then
	{
	echo "run_ci=false"
	echo "reason<<EOF"
	echo "$BLOCKING_REASON"
	echo "EOF"
	} >> "$GITHUB_OUTPUT"
	else
	echo "run_ci=true" >> "$GITHUB_OUTPUT"
	fi

	echo "Reason (if blocked): $BLOCKING_REASON"

	- name: Post comment if CI is blocked
	# This ensures that even if commenting fails, the next step will run
	continue-on-error: true
	# Only run this step if the check failed
	if: steps.check_conditions.outputs.run_ci == 'false'
	uses: peter-evans/create-or-update-comment@v4
	with:
	issue-number: ${{ github.event.pull_request.number }}
	body: \|
	### 🚧 CI Blocked

	The main CI workflow was not started for the following reason:
	> ${{ steps.check_conditions.outputs.reason }}

	- name: Fail the job to block downstream CI
	if: steps.check_conditions.outputs.run_ci == 'false'
	run: \|
	echo "Failing this job to prevent the main CI from running."
	exit 1

	# --- NEW JOB ---
	# This job runs first on the self-hosted pool, picks a runner,
	# and outputs its name so all other jobs can target it.
	# If PR title contains [FIX_FOR_VLLM_LATEST], it uses the hourly-ci runner pool.
	discover_runner:
	needs: gatekeeper
	runs-on: ${{ contains(github.event.pull_request.title, '[FIX_FOR_VLLM_LATEST]') && 'hourly-ci' \|\| 'pr-ci' }}
	outputs:
	runner_name: ${{ steps.get_name.outputs.name }}
	steps:
	- name: Get runner name
	id: get_name
	run: \|
	echo "This workflow will run on: ${{ runner.name }}"
	echo "name=${{ runner.name }}" >> "$GITHUB_OUTPUT"

	discover_tests:
	# --- UPDATED: Add discover_runner dependency ---
	needs: discover_runner
	# --- UPDATED: Run on the specific node ---
	runs-on: ${{ needs.discover_runner.outputs.runner_name }}
	outputs:
	matrix: ${{ steps.set-matrix.outputs.matrix }}
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	ref: ${{ github.event.pull_request.head.sha }}
	clean: true # Ensure a clean workspace before checkout
	- name: Discover test functions
	id: set-matrix
	run: \|
	# This command robustly discovers all functions in the script that match the 'run_*'
	# naming convention, excluding the main 'run_all_tests' function itself.
	# The final list is formatted into a JSON array required for the matrix strategy.
	TEST_FUNCTIONS=$( grep '^run_' ./tests/full_tests/ci_gsm8k_tests.sh \| \
	awk '{print $1}' \| \
	sed 's/()//' \| \
	jq -R . \| jq -s -c . )

	echo "Discovered test matrix: $TEST_FUNCTIONS"
	# Fail the job if no tests were found.
	if [ "$TEST_FUNCTIONS" = "[]" ]; then
	echo "::error::No test functions were discovered. Failing the workflow."
	exit 1
	fi
	echo "matrix=$TEST_FUNCTIONS" >> "$GITHUB_OUTPUT"

	pre-commit:
	# This job runs in parallel with the build job
	needs: gatekeeper
	runs-on: ubuntu-latest
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	ref: ${{ github.event.pull_request.head.sha }}
	clean: true # Ensure a clean workspace before checkout

	- name: Setup Python
	uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
	with:
	python-version: "3.12"

	- name: Add problem matchers
	run: \|
	echo "::add-matcher::.github/workflows/matchers/actionlint.json"
	echo "::add-matcher::.github/workflows/matchers/mypy.json"

	- name: Run pre-commit hooks
	uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
	with:
	extra_args: --all-files --hook-stage manual

	pre_merge_hpu_test_build:
	if: >
	!contains(github.event.pull_request.labels.*.name, 'skip-gaudi-tests')
	# --- UPDATED: Add discover_runner dependency ---
	needs: [pre-commit, discover_tests, discover_runner]
	# --- UPDATED: Run on the specific node ---
	runs-on: ${{ needs.discover_runner.outputs.runner_name }}
	permissions:
	contents: read # Required to checkout code and read history
	outputs:
	target_commit: ${{ steps.checkout_vllm_upstream.outputs.TEST_VLLM_COMMIT }}
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	# Fetch full history for accurate commit comparison
	ref: ${{ github.event.pull_request.head.sha }}
	clean: true # Ensure a clean workspace before checkout
	- name: Read Stable Commit from Target Branch
	id: read_stable_commit
	run: \|
	TARGET_FILE="VLLM_STABLE_COMMIT"
	COMMIT_HASH=""

	# 1. Primary Check: Look in the current branch
	echo "INFO: Checking for '$TARGET_FILE' in the current branch..."
	if [ -f "$TARGET_FILE" ]; then
	COMMIT_HASH=$(tr -d '[:space:]' < "$TARGET_FILE")
	if [ -n "$COMMIT_HASH" ]; then
	echo "✅ Found stable commit in current branch: $COMMIT_HASH"
	else
	echo "::warning:: File '$TARGET_FILE' is empty in current branch. Proceeding to fallback."
	fi
	else
	echo "INFO: File not found in current branch. Proceeding to fallback."
	fi

	# 2. Fallback Check: Look in the remote branch if the first check failed
	if [ -z "$COMMIT_HASH" ]; then
	TARGET_BRANCH="vllm/last-good-commit-for-vllm-gaudi"
	echo "➡️ Fallback: Attempting to read '$TARGET_FILE' from branch '$TARGET_BRANCH'..."

	git fetch origin "$TARGET_BRANCH" --no-tags --depth=1 \|\| true

	# Read file from the remote branch; suppress errors if not found
	COMMIT_HASH=$(git show "origin/$TARGET_BRANCH:$TARGET_FILE" 2>/dev/null) \|\| COMMIT_HASH=""
	COMMIT_HASH=$(echo "$COMMIT_HASH" \| tr -d '[:space:]')

	if [ -n "$COMMIT_HASH" ]; then
	echo "✅ Found stable commit in fallback branch: $COMMIT_HASH"
	else
	echo "::error:: Fallback failed. Could not find a valid commit hash in either location."
	fi
	fi

	# 3. Export the final result
	echo "VLLM_STABLE_COMMIT=$COMMIT_HASH" >> "$GITHUB_ENV"
	- name: Determine Target Commit Based on PR Title
	id: determine_commit
	# ⬇️ 1. Add this 'env' block to safely import the title
	env:
	PR_TITLE_ENV: ${{ github.event.pull_request.title }}
	run: \|
	# Default to the stable commit from the previous step
	FINAL_TARGET="${VLLM_STABLE_COMMIT}"
	# ⬇️ 2. Read the title from the safe environment variable
	PR_TITLE="$PR_TITLE_ENV"

	echo "Pull Request Title: \"${PR_TITLE}\""

	# Check if the title contains the special flag
	if [[ "$PR_TITLE" == "[FIX_FOR_VLLM_LATEST]" ]]; then
	echo "✅ Flag '[FIX_FOR_VLLM_LATEST]' found in title."
	echo "Setting target to 'main'."
	FINAL_TARGET="vllm-upstream/main"
	elif [ -z "$FINAL_TARGET" ]; then
	# This is the corrected check for a missing commit hash
	echo "⚠️ Stable commit was not found. Defaulting target to 'main'."
	FINAL_TARGET="vllm-upstream/main"
	else
	echo "Using stable commit: ${FINAL_TARGET}"
	fi

	# Export the result to an environment variable for subsequent steps
	echo "TARGET_COMMIT=${FINAL_TARGET}" >> "$GITHUB_ENV"
	echo "commit_ref=${FINAL_TARGET}" >> "$GITHUB_OUTPUT"
	- name: Add vLLM upstream as a remote and checkout target commit
	id: checkout_vllm_upstream
	run: \|
	echo "Attempting to remove remote branch if it exists..."
	git remote remove vllm-upstream \|\| true
	echo "add vllm-upstream remote..."
	git remote add vllm-upstream https://github.com/vllm-project/vllm.git
	echo "Checking out code from '${{ env.TARGET_COMMIT }}'..."
	git fetch vllm-upstream --depth=100
	git checkout "${{ env.TARGET_COMMIT }}"
	COMMIT_ID=$(git rev-parse HEAD)
	echo "TEST_VLLM_COMMIT=$COMMIT_ID" >> "$GITHUB_OUTPUT"
	echo "TEST_VLLM_COMMIT=$COMMIT_ID" >> "$GITHUB_ENV"
	echo "Target commit ID for testing: $COMMIT_ID"
	- name: Checkout repository
	uses: actions/checkout@v4
	with:
	# Fetch full history for accurate commit comparison
	ref: ${{ github.event.pull_request.head.sha }}
	clean: true # Ensure a clean workspace before checkout

	- name: Setup Docker environment and build image
	# Using a multi-line string for the Dockerfile is generally fine,
	# but consider moving it to a separate file for better readability/maintainability
	run: \|
	echo "Attempting to build Docker image..."
	# Ensure this image is accessible from GitHub Actions (e.g., public registry or authenticated private registry).
	docker build \
	--no-cache \
	--build-arg VLLM_COMMIT_ARG=${{ env.TEST_VLLM_COMMIT }} \
	-t hpu-plugin-v1-test-env-pre-merge-${{ github.event.pull_request.head.sha }} \
	-f - . <<EOF
	FROM vault.habana.ai/gaudi-docker/1.23.0/ubuntu24.04/habanalabs/pytorch-installer-2.9.0:latest

	ARG VLLM_COMMIT_ARG

	COPY ./ /workspace/vllm-gaudi
	WORKDIR /workspace

	RUN git clone https://github.com/vllm-project/vllm.git vllm
	WORKDIR /workspace/vllm
	RUN git checkout \$VLLM_COMMIT_ARG

	# Pinning versions in requirements might be good practice for CI consistency
	RUN pip install pytest pytest_asyncio pytest-timeout
	RUN pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git

	ENV no_proxy=localhost,127.0.0.1
	ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=true

	# Ensure setup.py install works as expected
	RUN bash -c 'pip install -r <(sed "/^[torch]/d" requirements/build.txt)'
	RUN VLLM_TARGET_DEVICE=empty pip install --no-build-isolation .

	# install development dependencies (for testing)
	RUN python3 -m pip install -e tests/vllm_test_utils

	WORKDIR /workspace/vllm-gaudi
	RUN pip install -e .

	WORKDIR /workspace
	# Symlinks. Ensure target paths are correct relative to the current WORKDIR
	# '/workspace/vllm/tests' is correct
	# Ensure these are created relative to the /workspace/ directory
	RUN ln -s /workspace/vllm/tests /workspace/tests \
	&& ln -s /workspace/vllm/examples /workspace/examples \
	&& ln -s /workspace/vllm/benchmarks /workspace/benchmarks

	EOF
	echo "Docker image built successfully."

	hpu_unit_tests:
	# --- UPDATED: Add discover_runner dependency ---
	needs: [pre_merge_hpu_test_build, discover_runner]
	# --- UPDATED: Run on the specific node ---
	runs-on: ${{ needs.discover_runner.outputs.runner_name }}
	steps:
	- name: Run pytest in tests/unit_tests
	run: \|
	EXITCODE=1
	remove_docker_containers() { docker rm -f hpu-plugin-v1-test-unit-tests-${{ github.event.pull_request.head.sha }} \|\| true; }
	trap 'remove_docker_containers; exit $EXITCODE;' EXIT
	remove_docker_containers

	echo "Running HPU plugin v1 unit tests"
	docker run --rm --runtime=habana --name=hpu-plugin-v1-test-unit-tests-${{ github.event.pull_request.head.sha }} --network=host \
	-e HABANA_VISIBLE_DEVICES=all \
	-e HF_HOME=/workspace/hf_cache \
	-e HF_TOKEN=${{ secrets.HF_TOKEN }} \
	-v /mnt/hf_cache:/workspace/hf_cache \
	hpu-plugin-v1-test-env-pre-merge-${{ github.event.pull_request.head.sha }} \
	/bin/bash -c "pytest -vvv --timeout=300 --durations=10 --durations-min=1.0 /workspace/vllm-gaudi/tests/unit_tests"

	EXITCODE=$?
	echo "Test script exited with code: $EXITCODE"
	hpu_pd_tests:
	# --- UPDATED: Add discover_runner dependency ---
	needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_runner]
	# --- UPDATED: Run on the specific node ---
	runs-on: ${{ needs.discover_runner.outputs.runner_name }}
	steps:
	- name: Run test scripts
	run: \|
	EXITCODE=1
	remove_docker_containers() { docker rm -f hpu-plugin-v1-test-pd-tests-${{ github.event.pull_request.head.sha }} \|\| true; }
	trap 'remove_docker_containers; exit $EXITCODE;' EXIT
	remove_docker_containers

	echo "Running HPU plugin v1 nixl pd tests"
	docker run --rm --runtime=habana --name=hpu-plugin-v1-test-pd-tests-${{ github.event.pull_request.head.sha }} --network=host \
	--privileged \
	-e HABANA_VISIBLE_DEVICES=all \
	-e HF_HOME=/workspace/hf_cache \
	-e HF_TOKEN=${{ secrets.HF_TOKEN }} \
	-v /mnt/hf_cache:/workspace/hf_cache \
	-v /mnt/wheels_cache:/tmp/wheels_cache \
	hpu-plugin-v1-test-env-pre-merge-${{ github.event.pull_request.head.sha }} \
	/bin/bash -c "
	pip install lm-eval[api] &&
	cd /workspace/vllm-gaudi/tests/unit_tests &&
	./run_accuracy_test.sh
	"

	EXITCODE=$?
	echo "Test script exited with code: $EXITCODE"
	hpu_perf_tests:
	# --- UPDATED: Add discover_runner dependency ---
	needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_runner]
	# --- UPDATED: Run on the specific node ---
	runs-on: ${{ needs.discover_runner.outputs.runner_name }}
	steps:
	- name: Run test scripts
	run: \|
	EXITCODE=1
	remove_docker_containers() { docker rm -f hpu-plugin-v1-test-perf-tests-${{ github.event.pull_request.head.sha }} \|\| true; }
	trap 'remove_docker_containers; exit $EXITCODE;' EXIT
	remove_docker_containers

	echo "Running HPU plugin v1 perf tests"
	docker run --rm --runtime=habana --name=hpu-plugin-v1-test-perf-tests-${{ github.event.pull_request.head.sha }} --network=host \
	--privileged \
	-e HABANA_VISIBLE_DEVICES=all \
	-e HF_TOKEN=${{ secrets.HF_TOKEN }} \
	-e HF_HOME=/workspace/hf_cache \
	-v /mnt/hf_cache:/workspace/hf_cache \
	hpu-plugin-v1-test-env-pre-merge-${{ github.event.pull_request.head.sha }} \
	/bin/bash "/workspace/vllm-gaudi/tests/full_tests/ci_perf_tests.sh"

	EXITCODE=$?
	echo "Test script exited with code: $EXITCODE"
	hpu_dp_tests:
	# --- UPDATED: Add discover_runner dependency ---
	needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_runner]
	# --- UPDATED: Run on the specific node ---
	runs-on: ${{ needs.discover_runner.outputs.runner_name }}
	steps:
	- name: Run test scripts
	run: \|
	EXITCODE=1
	remove_docker_containers() { docker rm -f hpu-plugin-v1-test-dp-tests-${{ github.event.pull_request.head.sha }} \|\| true; }
	trap 'remove_docker_containers; exit $EXITCODE;' EXIT
	remove_docker_containers

	echo "Running HPU plugin v1 dp tests"
	docker run --rm --runtime=habana --name=hpu-plugin-v1-test-dp-tests-${{ github.event.pull_request.head.sha }} --network=host \
	--privileged \
	-e HABANA_VISIBLE_DEVICES=all \
	-e HF_HOME=/workspace/hf_cache \
	-e VLLM_SKIP_WARMUP=true \
	-e PT_HPU_LAZY_MODE=1 \
	-e HF_TOKEN=${{ secrets.HF_TOKEN }} \
	-v /mnt/hf_cache:/workspace/hf_cache \
	hpu-plugin-v1-test-env-pre-merge-${{ github.event.pull_request.head.sha }} \
	/bin/bash -c "python -u /workspace/vllm-gaudi/examples/data_parallel.py --dp-size 2 --tp-size 2"

	EXITCODE=$?
	echo "Test script exited with code: $EXITCODE"
	e2e:
	# --- UPDATED: Add discover_runner dependency ---
	needs: [pre_merge_hpu_test_build, hpu_unit_tests, discover_tests, discover_runner]
	# --- UPDATED: Run on the specific node ---
	runs-on: ${{ needs.discover_runner.outputs.runner_name }}
	strategy:
	fail-fast: false
	matrix:
	# The list of test functions is dynamically populated from the output of the 'discover_tests' job.
	test_function: ${{ fromJson(needs.discover_tests.outputs.matrix) }}

	steps:
	- name: Run test suite - ${{ matrix.test_function }}
	run: \|
	EXITCODE=1
	CONTAINER_NAME="hpu-plugin-test-e2e-${{ github.event.pull_request.head.sha }}-${{ matrix.test_function }}"
	# Ensure the container is removed upon exit, regardless of success or failure.
	remove_docker_containers() { docker rm -f $CONTAINER_NAME \|\| true; }
	trap 'remove_docker_containers; exit $EXITCODE;' EXIT
	remove_docker_containers

	echo "Running HPU plugin test: ${{ matrix.test_function }}"
	docker run --rm --runtime=habana --name=$CONTAINER_NAME --network=host \
	--privileged \
	-e HABANA_VISIBLE_DEVICES=all \
	-e HF_HOME=/workspace/hf_cache \
	-e HF_TOKEN=${{ secrets.HF_TOKEN }} \
	-v /mnt/hf_cache:/workspace/hf_cache \
	hpu-plugin-v1-test-env-pre-merge-${{ github.event.pull_request.head.sha }} \
	/bin/bash "/workspace/vllm-gaudi/tests/full_tests/ci_gsm8k_tests.sh" "${{ matrix.test_function }}"

	EXITCODE=$?
	echo "Test script exited with code: $EXITCODE"

	pre_merge_hpu_test:
	# --- UPDATED: Add discover_runner dependency ---
	needs: [hpu_unit_tests, e2e, hpu_perf_tests, discover_runner]
	# --- UPDATED: Run on the specific node ---
	runs-on: ${{ needs.discover_runner.outputs.runner_name }}
	# This job is required to pass for pre-merge CI. By itself it does nothing, and will only pass if all jobs specified in "needs" list pass.
	steps:
	- name: Succeeded if all previous jobs passed
	run: echo "All previous jobs passed."
	# This is a new job, at the same level as hpu-test-suite
	post-comment:
	name: Post PR Comment
	# This job runs after hpu-test-suite completes
	needs: [pre_merge_hpu_test, pre_merge_hpu_test_build]
	runs-on: ubuntu-latest
	permissions:
	# Permissions are required on a per-job basis
	pull-requests: write
	steps:
	- name: Post Comment on Success
	if: needs.pre_merge_hpu_test.result == 'success'
	uses: peter-evans/create-or-update-comment@v4
	with:
	issue-number: ${{ github.event.pull_request.number }}
	body: \|
	### ✅ CI Passed
	All checks passed successfully against the following vllm commit:
	`${{ needs.pre_merge_hpu_test_build.outputs.target_commit }}`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

GPT OSS Integration Code #4439

Workflow file

GPT OSS Integration Code #4439

Uh oh!

Workflow file for this run