From 7db85ffaa2b3c05619238502bb0d2709633fd52b Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Thu, 4 Sep 2025 01:04:36 +0800 Subject: [PATCH 01/13] Add action to generate and upload MCP-Eval badges from JSON report --- .github/actions/mcp-eval/badges/action.yaml | 90 +++++++++++++++++++++ .github/workflows/mcpeval-reusable.yml | 15 ++-- .github/workflows/mcpeval.yml | 15 ++-- 3 files changed, 102 insertions(+), 18 deletions(-) create mode 100644 .github/actions/mcp-eval/badges/action.yaml diff --git a/.github/actions/mcp-eval/badges/action.yaml b/.github/actions/mcp-eval/badges/action.yaml new file mode 100644 index 0000000..27f001f --- /dev/null +++ b/.github/actions/mcp-eval/badges/action.yaml @@ -0,0 +1,90 @@ +name: "Generate MCP-Eval Badges" +description: "Generate badges from MCP-Eval JSON report using shields.io" +branding: + icon: award + color: green +inputs: + report-path: + description: "Path to the MCP-Eval JSON report file" + required: true + output-dir: + description: "Directory to write generated badge files (optional, for caching)" + default: "mcpeval-reports/badges" + required: false + tests-label: + description: "Label text for the tests badge" + default: "mcp-tests" + required: false + coverage-label: + description: "Label text for the coverage badge" + default: "mcp-cov" + required: false + upload-artifacts: + description: "Upload badges as workflow artifacts" + default: "false" + required: false + artifact-name: + description: "Name for the uploaded badge artifacts" + default: "mcpeval-badges" + required: false +outputs: + tests-badge-path: + description: "Path to the generated tests badge SVG (if output-dir is set)" + value: ${{ steps.generate.outputs.tests_badge_path }} + coverage-badge-path: + description: "Path to the generated coverage badge SVG (if output-dir is set)" + value: ${{ steps.generate.outputs.coverage_badge_path }} +runs: + using: "composite" + steps: + - name: Fetch badge generation script + shell: bash + run: | + # Create a temporary directory for the mcp-eval script + mkdir -p .mcp-eval-action + cd .mcp-eval-action + + # Initialize git and configure sparse checkout + git init + git remote add origin https://github.com/lastmile-ai/mcp-eval.git + git config core.sparseCheckout true + + # Configure sparse checkout to only get the script we need + echo "scripts/generate_badges.py" >> .git/info/sparse-checkout + + # Fetch and checkout the specific file + git fetch --depth=1 origin main + git checkout origin/main + + # Move back to the workspace root + cd .. + + - name: Generate badges + id: generate + shell: bash + run: | + set -euo pipefail + + # Run the badge generation script with uv + uv run .mcp-eval-action/scripts/generate_badges.py \ + --report "${{ inputs.report-path }}" \ + --outdir "${{ inputs.output-dir }}" \ + --label-tests "${{ inputs.tests-label }}" \ + --label-cov "${{ inputs.coverage-label }}" + + # Set output paths if badges were generated + if [ -n "${{ inputs.output-dir }}" ]; then + if [ -f "${{ inputs.output-dir }}/tests.svg" ] && [ -f "${{ inputs.output-dir }}/coverage.svg" ]; then + echo "tests_badge_path=$(realpath ${{ inputs.output-dir }}/tests.svg)" >> $GITHUB_OUTPUT + echo "coverage_badge_path=$(realpath ${{ inputs.output-dir }}/coverage.svg)" >> $GITHUB_OUTPUT + fi + fi + + - name: Upload badge artifacts + if: ${{ inputs.upload-artifacts == 'true' && inputs.output-dir != '' }} + uses: actions/upload-artifact@v4 + with: + name: ${{ inputs.artifact-name }} + path: ${{ inputs.output-dir }} + retention-days: 14 + if-no-files-found: warn \ No newline at end of file diff --git a/.github/workflows/mcpeval-reusable.yml b/.github/workflows/mcpeval-reusable.yml index 85c8387..4a7e434 100644 --- a/.github/workflows/mcpeval-reusable.yml +++ b/.github/workflows/mcpeval-reusable.yml @@ -78,16 +78,13 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - name: Generate badges - run: | - set -euo pipefail - uv run scripts/generate_badges.py --report "${{ steps.mcpeval.outputs.results-json-path }}" --outdir mcpeval-reports/badges - - - name: Upload badge artifacts - uses: actions/upload-artifact@v4 + - name: Generate and upload badges + uses: actions/mcp-eval/badges with: - name: mcpeval-badges - path: mcpeval-reports/badges + report-path: ${{ steps.mcpeval.outputs.results-json-path }} + output-dir: mcpeval-reports/badges + upload-artifacts: true + artifact-name: mcpeval-badges pages: name: Publish Report to Pages diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index d1c6eba..2ec935e 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -48,16 +48,13 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - name: Generate badges - run: | - set -euo pipefail - uv run scripts/generate_badges.py --report "${{ steps.mcpeval.outputs.results-json-path }}" --outdir mcpeval-reports/badges - - - name: Upload badge artifacts - uses: actions/upload-artifact@v4 + - name: Generate and upload badges + uses: actions/mcp-eval/badges with: - name: mcpeval-badges - path: mcpeval-reports/badges + report-path: ${{ steps.mcpeval.outputs.results-json-path }} + output-dir: mcpeval-reports/badges + upload-artifacts: true + artifact-name: mcpeval-badges # Post the Markdown report as a sticky PR comment for easy review - name: Comment PR with MCP-Eval report From f505baf1b9df90c9c2b9abcfa779b2a56645b5b9 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Thu, 4 Sep 2025 01:09:04 +0800 Subject: [PATCH 02/13] Fix github action path in workflows --- .github/workflows/mcpeval-reusable.yml | 2 +- .github/workflows/mcpeval.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mcpeval-reusable.yml b/.github/workflows/mcpeval-reusable.yml index 4a7e434..e1905de 100644 --- a/.github/workflows/mcpeval-reusable.yml +++ b/.github/workflows/mcpeval-reusable.yml @@ -79,7 +79,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: Generate and upload badges - uses: actions/mcp-eval/badges + uses: .github/actions/mcp-eval/badges with: report-path: ${{ steps.mcpeval.outputs.results-json-path }} output-dir: mcpeval-reports/badges diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index 2ec935e..719329c 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -49,7 +49,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: Generate and upload badges - uses: actions/mcp-eval/badges + uses: .github/actions/mcp-eval/badges with: report-path: ${{ steps.mcpeval.outputs.results-json-path }} output-dir: mcpeval-reports/badges From 0b15ad5b0bf3d2f6940ab59e9a75d29c480590f6 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Thu, 4 Sep 2025 23:12:11 +0800 Subject: [PATCH 03/13] Enhance badge generation action and update workflows to publish badges to github pages --- .github/actions/mcp-eval/badges/action.yaml | 59 +++++++++++-------- .github/workflows/mcpeval-reusable.yml | 33 +++++++++-- .github/workflows/mcpeval.yml | 41 ++++++++++---- scripts/generate_badges.py | 63 +++++++++++++++++---- 4 files changed, 144 insertions(+), 52 deletions(-) diff --git a/.github/actions/mcp-eval/badges/action.yaml b/.github/actions/mcp-eval/badges/action.yaml index 27f001f..9152531 100644 --- a/.github/actions/mcp-eval/badges/action.yaml +++ b/.github/actions/mcp-eval/badges/action.yaml @@ -11,6 +11,10 @@ inputs: description: "Directory to write generated badge files (optional, for caching)" default: "mcpeval-reports/badges" required: false + format: + description: "Output format: svg, endpoint, or both (default: both)" + default: "both" + required: false tests-label: description: "Label text for the tests badge" default: "mcp-tests" @@ -37,40 +41,47 @@ outputs: runs: using: "composite" steps: - - name: Fetch badge generation script - shell: bash - run: | - # Create a temporary directory for the mcp-eval script - mkdir -p .mcp-eval-action - cd .mcp-eval-action - - # Initialize git and configure sparse checkout - git init - git remote add origin https://github.com/lastmile-ai/mcp-eval.git - git config core.sparseCheckout true - - # Configure sparse checkout to only get the script we need - echo "scripts/generate_badges.py" >> .git/info/sparse-checkout - - # Fetch and checkout the specific file - git fetch --depth=1 origin main - git checkout origin/main - - # Move back to the workspace root - cd .. - - name: Generate badges id: generate shell: bash run: | set -euo pipefail + # Check if local script exists, otherwise fetch from upstream + if [ -f "scripts/generate_badges.py" ]; then + echo "Using local badge generation script" + SCRIPT_PATH="scripts/generate_badges.py" + else + echo "Fetching badge generation script from upstream" + # Create a temporary directory for the mcp-eval script + mkdir -p .mcp-eval-action + cd .mcp-eval-action + + # Initialize git and configure sparse checkout + git init + git remote add origin https://github.com/lastmile-ai/mcp-eval.git + git config core.sparseCheckout true + + # Configure sparse checkout to only get the script we need + echo "scripts/generate_badges.py" >> .git/info/sparse-checkout + + # Fetch and checkout the specific file (pinned to a stable commit) + # TODO: Update this to a specific tag/release when available + git fetch --depth=1 origin main + git checkout origin/main + + # Move back to the workspace root + cd .. + SCRIPT_PATH=".mcp-eval-action/scripts/generate_badges.py" + fi + # Run the badge generation script with uv - uv run .mcp-eval-action/scripts/generate_badges.py \ + uv run "$SCRIPT_PATH" \ --report "${{ inputs.report-path }}" \ --outdir "${{ inputs.output-dir }}" \ --label-tests "${{ inputs.tests-label }}" \ - --label-cov "${{ inputs.coverage-label }}" + --label-cov "${{ inputs.coverage-label }}" \ + --format "${{ inputs.format }}" # Set output paths if badges were generated if [ -n "${{ inputs.output-dir }}" ]; then diff --git a/.github/workflows/mcpeval-reusable.yml b/.github/workflows/mcpeval-reusable.yml index e1905de..2e07368 100644 --- a/.github/workflows/mcpeval-reusable.yml +++ b/.github/workflows/mcpeval-reusable.yml @@ -79,15 +79,16 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: Generate and upload badges - uses: .github/actions/mcp-eval/badges + uses: ./.github/actions/mcp-eval/badges with: report-path: ${{ steps.mcpeval.outputs.results-json-path }} - output-dir: mcpeval-reports/badges - upload-artifacts: true + output-dir: badges + format: 'both' + upload-artifacts: 'true' artifact-name: mcpeval-badges pages: - name: Publish Report to Pages + name: Publish report and badges to GitHub Pages needs: run if: ${{ inputs.deploy-pages }} runs-on: ubuntu-latest @@ -98,8 +99,20 @@ jobs: environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} + concurrency: + group: "pages" + cancel-in-progress: false steps: - - name: Download artifacts + - name: Checkout + uses: actions/checkout@v4 + + - name: Download badge artifacts + uses: actions/download-artifact@v4 + with: + name: mcpeval-badges + path: badges/ + + - name: Download report artifacts uses: actions/download-artifact@v4 with: name: ${{ inputs.artifact-name }} @@ -109,6 +122,13 @@ jobs: run: | set -euo pipefail mkdir -p site + + # Copy badges to site + if [[ -d badges ]]; then + cp -r badges site/ + fi + + # Copy HTML report as index.html if [[ -f "mcpeval-artifacts/${{ inputs.reports-dir }}/${{ inputs.html-report }}" ]]; then cp mcpeval-artifacts/${{ inputs.reports-dir }}/${{ inputs.html-report }} site/index.html else @@ -116,6 +136,9 @@ jobs: if [[ -n "$file" ]]; then cp "$file" site/index.html; else echo '

No report available

' > site/index.html; fi fi + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Upload Pages artifact uses: actions/upload-pages-artifact@v3 with: diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index 719329c..6a522b4 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -49,11 +49,12 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: Generate and upload badges - uses: .github/actions/mcp-eval/badges + uses: ./.github/actions/mcp-eval/badges with: report-path: ${{ steps.mcpeval.outputs.results-json-path }} - output-dir: mcpeval-reports/badges - upload-artifacts: true + output-dir: badges + format: 'both' + upload-artifacts: 'true' artifact-name: mcpeval-badges # Post the Markdown report as a sticky PR comment for easy review @@ -86,9 +87,9 @@ jobs: await github.rest.issues.createComment({ owner, repo, issue_number, body }); } - # Optional: Publish the HTML report to GitHub Pages on main/master pushes + # Optional: Publish the HTML report and badges to GitHub Pages on main/master pushes pages: - name: Publish Report to Pages + name: Publish report and badges to GitHub Pages needs: tests if: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master') }} runs-on: ubuntu-latest @@ -99,8 +100,20 @@ jobs: environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} + concurrency: + group: "pages" + cancel-in-progress: false steps: - - name: Download artifacts + - name: Checkout + uses: actions/checkout@v4 + + - name: Download badge artifacts + uses: actions/download-artifact@v4 + with: + name: mcpeval-badges + path: badges/ + + - name: Download report artifacts uses: actions/download-artifact@v4 with: name: mcpeval-artifacts @@ -110,18 +123,22 @@ jobs: run: | set -euo pipefail mkdir -p site - # Prefer the configured HTML filename; fallback to first HTML we find + + # Copy badges to site + if [[ -d badges ]]; then + cp -r badges site/ + fi + + # Copy HTML report as index.html if [[ -f "mcpeval-artifacts/mcpeval-reports/mcpeval-results.html" ]]; then cp mcpeval-artifacts/mcpeval-reports/mcpeval-results.html site/index.html else file=$(find mcpeval-artifacts -name "*.html" | head -n 1 || true) if [[ -n "$file" ]]; then cp "$file" site/index.html; else echo '

No report available

' > site/index.html; fi fi - # Include badges if available - if [[ -d "mcpeval-artifacts/mcpeval-reports/badges" ]]; then - mkdir -p site/badges - cp -r mcpeval-artifacts/mcpeval-reports/badges/*.svg site/badges/ || true - fi + + - name: Setup Pages + uses: actions/configure-pages@v5 - name: Upload Pages artifact uses: actions/upload-pages-artifact@v3 diff --git a/scripts/generate_badges.py b/scripts/generate_badges.py index 0caa4fd..d5a3242 100644 --- a/scripts/generate_badges.py +++ b/scripts/generate_badges.py @@ -114,7 +114,7 @@ def make_badge(label: str, value: str, color: str) -> str: right_w = _measure_text(value) total_w = left_w + right_w # Construct an SVG similar to shields style - svg = f''' + svg = f""" @@ -133,7 +133,7 @@ def make_badge(label: str, value: str, color: str) -> str: {value} {value} -''' +""" return svg @@ -142,6 +142,13 @@ def write_text(path: Path, text: str) -> None: path.write_text(text, encoding="utf-8") +def write_endpoint_json(path: Path, label: str, message: str, color: str) -> None: + """Write a Shields.io endpoint JSON file.""" + data = {"schemaVersion": 1, "label": label, "message": message, "color": color} + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(data, indent=2), encoding="utf-8") + + def cli( report: str = typer.Option( ..., @@ -151,7 +158,7 @@ def cli( outdir: str = typer.Option( "mcpeval-reports/badges", "--outdir", - help="Output directory for generated SVG badges", + help="Output directory for generated badges", ), label_tests: str = typer.Option( "mcp-tests", "--label-tests", help="Label for tests badge" @@ -159,16 +166,39 @@ def cli( label_cov: str = typer.Option( "mcp-cov", "--label-cov", help="Label for coverage badge" ), + format: str = typer.Option( + "both", + "--format", + help="Output format: svg, endpoint, or both (default: both)", + ), ): report_path = Path(report) outdir_path = Path(outdir) + # Validate format option + if format not in ["svg", "endpoint", "both"]: + typer.echo(f"Invalid format: {format}. Must be 'svg', 'endpoint', or 'both'") + raise typer.Exit(1) + try: report_obj = load_report(report_path) except Exception: outdir_path.mkdir(parents=True, exist_ok=True) - write_text(outdir_path / "tests.svg", make_badge(label_tests, "0/0", "#9f9f9f")) - write_text(outdir_path / "coverage.svg", make_badge(label_cov, "0%", "#9f9f9f")) + # Generate fallback badges for errors + if format in ["svg", "both"]: + write_text( + outdir_path / "tests.svg", make_badge(label_tests, "0/0", "#9f9f9f") + ) + write_text( + outdir_path / "coverage.svg", make_badge(label_cov, "0%", "#9f9f9f") + ) + if format in ["endpoint", "both"]: + write_endpoint_json( + outdir_path / "mcp-tests.json", label_tests, "0/0", "#9f9f9f" + ) + write_endpoint_json( + outdir_path / "mcp-cov.json", label_cov, "0%", "#9f9f9f" + ) return passed, total, rate = compute_pass_fail(report_obj) @@ -179,12 +209,23 @@ def cli( cov_value = f"{int(round(cov_pct))}%" cov_color = _color_for_percentage(cov_pct) - write_text( - outdir_path / "tests.svg", make_badge(label_tests, tests_value, tests_color) - ) - write_text( - outdir_path / "coverage.svg", make_badge(label_cov, cov_value, cov_color) - ) + # Generate SVG badges + if format in ["svg", "both"]: + write_text( + outdir_path / "tests.svg", make_badge(label_tests, tests_value, tests_color) + ) + write_text( + outdir_path / "coverage.svg", make_badge(label_cov, cov_value, cov_color) + ) + + # Generate Shields endpoint JSON files + if format in ["endpoint", "both"]: + write_endpoint_json( + outdir_path / "mcp-tests.json", label_tests, tests_value, tests_color + ) + write_endpoint_json( + outdir_path / "mcp-cov.json", label_cov, cov_value, cov_color + ) if __name__ == "__main__": From 3ff6820e7744e8ca57d3f6f046adb127c5a92f48 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sat, 6 Sep 2025 18:23:29 +0800 Subject: [PATCH 04/13] Update github workflow in docs --- docs/ci-cd.mdx | 64 +++++++++++++----- docs/common-workflows.mdx | 136 ++++++++++++++++++++++++++------------ docs/examples.mdx | 50 ++++++-------- 3 files changed, 164 insertions(+), 86 deletions(-) diff --git a/docs/ci-cd.mdx b/docs/ci-cd.mdx index 7f0a92b..c57c992 100644 --- a/docs/ci-cd.mdx +++ b/docs/ci-cd.mdx @@ -3,50 +3,82 @@ title: "CI/CD" description: "Run mcp-eval in GitHub Actions, publish artifacts, post PR comments, and add badges." sidebarTitle: "CI/CD" icon: "truck-fast" -keywords: ["github actions","artifacts","pages","badges"] +keywords: ["github actions", "artifacts", "pages", "badges"] --- ### Run action -Use the uv‑based action to install, run, and upload artifacts. +Use the uv‑based action to install, run, and upload artifacts. For the complete workflow configuration, visit [mcpeval.yml](https://github.com/lastmile-ai/mcp-eval/blob/main/.github/workflows/mcpeval.yml). ```yaml jobs: tests: + name: Run MCP-Eval runs-on: ubuntu-latest + defaults: + run: + shell: bash steps: - - uses: actions/checkout@v4 - - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run@v1 + - name: Checkout + uses: actions/checkout@v4 + + - name: Run MCP-Eval (uv) + id: mcpeval + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run with: - python-version: '3.11' + python-version: "3.11" + working-directory: . + run-args: "-v" tests: tests/ - run-args: '-v --max-concurrency 4' - pr-comment: 'true' - set-summary: 'true' - upload-artifacts: 'true' + reports-dir: mcpeval-reports + json-report: mcpeval-results.json + markdown-report: mcpeval-results.md + html-report: mcpeval-results.html + artifact-name: mcpeval-artifacts + pr-comment: "true" + set-summary: "true" + upload-artifacts: "true" env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + - name: Generate and upload badges + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges + with: + report-path: ${{ steps.mcpeval.outputs.results-json-path }} + output-dir: badges + format: "both" + upload-artifacts: "true" + artifact-name: mcpeval-badges ``` Sources: + - Action: [action.yaml](https://github.com/lastmile-ai/mcp-eval/blob/main/.github/actions/mcp-eval/run/action.yaml) - README: [run/README.md](https://github.com/lastmile-ai/mcp-eval/blob/main/.github/actions/mcp-eval/run/README.md) - Workflows: [mcpeval.yml](https://github.com/lastmile-ai/mcp-eval/blob/main/.github/workflows/mcpeval.yml), [mcpeval-reusable.yml](https://github.com/lastmile-ai/mcp-eval/blob/main/.github/workflows/mcpeval-reusable.yml) -### Publish HTML via Pages +### Publish HTML and Badges via Pages + +The workflow automatically deploys both the HTML report and badges to GitHub Pages when pushing to main/master branches. After deployment, badges are accessible at `https://.github.io//badges/`. -Enable the Pages deploy job in the provided workflow. +To enable GitHub Pages deployment: + +1. Enable GitHub Pages in your repository settings +2. The workflow will automatically deploy on pushes to main/master +3. Badges and reports will be available at your Pages URL ### Badges -Artifacts include badges under `mcpeval-reports/badges`. Embed in README: +After deployment to GitHub Pages, reference badges using your Pages URL: ```markdown -![MCP Tests](mcpeval-reports/badges/tests.svg) -![MCP Tool Coverage](mcpeval-reports/badges/coverage.svg) +[![mcp-tests](https://img.shields.io/endpoint?url=https://YOUR_USERNAME.github.io/YOUR_REPO/badges/mcp-tests.json&cacheSeconds=300)](https://YOUR_USERNAME.github.io/YOUR_REPO/) +[![mcp-cov](https://img.shields.io/endpoint?url=https://YOUR_USERNAME.github.io/YOUR_REPO/badges/mcp-cov.json&cacheSeconds=300)](https://YOUR_USERNAME.github.io/YOUR_REPO/) ``` -{/* TODO: Add screenshots of the PR comment, the summary, and the published HTML report on Pages. */} - +For example, +[![mcp-tests](https://img.shields.io/endpoint?url=https://lastmile-ai.github.io/mcp-eval/badges/mcp-tests.json&cacheSeconds=300)](https://lastmile-ai.github.io/mcp-eval/) +[![mcp-cov](https://img.shields.io/endpoint?url=https://lastmile-ai.github.io/mcp-eval/badges/mcp-cov.json&cacheSeconds=300)](https://lastmile-ai.github.io/mcp-eval/) +{/* TODO: Add screenshots of the PR comment, the summary, and the published HTML report on Pages. */} diff --git a/docs/common-workflows.mdx b/docs/common-workflows.mdx index acf2dce..8883107 100644 --- a/docs/common-workflows.mdx +++ b/docs/common-workflows.mdx @@ -300,72 +300,126 @@ Ensure your agent follows the optimal execution path: Create `.github/workflows/mcp-eval.yml`: ```yaml - name: mcp-eval Tests + name: MCP-Eval CI on: pull_request: push: branches: [main] + permissions: + contents: read + pull-requests: write + jobs: test: runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run MCP-Eval + id: mcpeval + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run@v1 + with: + python-version: '3.11' + tests: tests/ + run-args: '-v --max-concurrency 4' + pr-comment: 'true' + set-summary: 'true' + upload-artifacts: 'true' + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + - name: Generate and upload badges + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges@v1 + with: + report-path: ${{ steps.mcpeval.outputs.results-json-path }} + output-dir: badges + format: 'both' + upload-artifacts: 'true' + artifact-name: mcpeval-badges + + # Deploy badges and report to GitHub Pages + pages: + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + needs: test + runs-on: ubuntu-latest + permissions: + pages: write + id-token: write + contents: read + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v4 + - name: Download badge artifacts + uses: actions/download-artifact@v4 with: - python-version: '3.10' + name: mcpeval-badges + path: badges/ - - name: Install dependencies - run: | - pip install mcpevals - # Or using uv (faster!): - # uv add mcpevals - # Or from your repo: - # pip install -e . + - name: Download report artifacts + uses: actions/download-artifact@v4 + with: + name: mcpeval-artifacts + path: ./mcpeval-artifacts - - name: Run mcp-eval tests - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + - name: Prepare site run: | - mcp-eval run tests/ \ - --json test-reports/results.json \ - --markdown test-reports/results.md \ - --html test-reports/index.html + set -euo pipefail + mkdir -p site + + # Copy badges to site + if [[ -d badges ]]; then + cp -r badges site/ + fi + + # Copy HTML report as index.html + if [[ -f "mcpeval-artifacts/mcpeval-reports/mcpeval-results.html" ]]; then + cp mcpeval-artifacts/mcpeval-reports/mcpeval-results.html site/index.html + else + file=$(find mcpeval-artifacts -name "*.html" | head -n 1 || true) + if [[ -n "$file" ]]; then cp "$file" site/index.html; else echo '

No report available

' > site/index.html; fi + fi - - name: Upload test reports - if: always() - uses: actions/upload-artifact@v3 - with: - name: mcp-eval-reports - path: test-reports/ + - name: Setup Pages + uses: actions/configure-pages@v5 - - name: Comment PR with results - if: github.event_name == 'pull_request' - uses: actions/github-script@v6 + - name: Upload Pages artifact + uses: actions/upload-pages-artifact@v3 with: - script: | - const fs = require('fs'); - const markdown = fs.readFileSync('test-reports/results.md', 'utf8'); - - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: `## mcp-eval Test Results\n\n${markdown}` - }); + path: ./site + + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 ``` - - In your README.md: + + In your repository settings: + 1. Go to Settings → Pages + 2. Source: Deploy from a branch + 3. Branch: gh-pages (created automatically by the workflow) + 4. Save the settings + + Your badges and reports will be available at: + - Badges: `https://YOUR_USERNAME.github.io/YOUR_REPO/badges/` + - Report: `https://YOUR_USERNAME.github.io/YOUR_REPO/` + + + + After deploying to GitHub Pages, you may add badges to your README.md to show users your mcp-eval test and coverage status: ```markdown - ![mcp-eval Tests](https://github.com/YOUR_ORG/YOUR_REPO/actions/workflows/mcp-eval.yml/badge.svg) + [![mcp-tests](https://img.shields.io/endpoint?url=https://YOUR_USERNAME.github.io/YOUR_REPO/badges/mcp-tests.json&cacheSeconds=300)](https://YOUR_USERNAME.github.io/YOUR_REPO/) + [![mcp-cov](https://img.shields.io/endpoint?url=https://YOUR_USERNAME.github.io/YOUR_REPO/badges/mcp-cov.json&cacheSeconds=300)](https://YOUR_USERNAME.github.io/YOUR_REPO/) ``` + + These badges will automatically update after each push to main. diff --git a/docs/examples.mdx b/docs/examples.mdx index 8bef707..d9282ff 100644 --- a/docs/examples.mdx +++ b/docs/examples.mdx @@ -938,43 +938,35 @@ mcp-eval run examples/ \ ### CI/CD integration ```yaml -# .github/workflows/test.yml -name: Run mcp-eval Tests - -on: [push, pull_request] +name: mcp-eval PR Tests +on: + pull_request: + branches: [ "main" ] jobs: - test: + tests: + permissions: + contents: read + pull-requests: write + issues: write runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Setup Python - uses: actions/setup-python@v4 + - name: Checkout + uses: actions/checkout@v4 + - name: Run MCP-Eval + id: mcpeval + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run@v0.1.0 with: python-version: '3.11' - - - name: Install dependencies - run: | - # We recommend using uv: - # uv add mcpevals - pip install mcpevals - pip install -r requirements.txt - - - name: Run tests + tests: tests/ + run-args: '-v --max-concurrency 4' + pr-comment: 'true' + set-summary: 'true' + upload-artifacts: 'true' + commit-reports: 'true' env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - run: | - mcp-eval run examples/ \ - --html test-results/report.html \ - --junit test-results/junit.xml - - - name: Upload results - uses: actions/upload-artifact@v3 - with: - name: test-results - path: test-results/ + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ``` {/* TODO: Screenshot of CI/CD test results in GitHub Actions */} From 64cb489cd1dfd304e929ab3e08692c0bb51819e0 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sat, 6 Sep 2025 19:00:46 +0800 Subject: [PATCH 05/13] Update action paths in workflows to use the repository reference --- .github/workflows/mcpeval-reusable.yml | 4 ++-- .github/workflows/mcpeval.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/mcpeval-reusable.yml b/.github/workflows/mcpeval-reusable.yml index 2e07368..01403ed 100644 --- a/.github/workflows/mcpeval-reusable.yml +++ b/.github/workflows/mcpeval-reusable.yml @@ -63,7 +63,7 @@ jobs: - name: Run MCP-Eval id: mcpeval - uses: ./.github/actions/mcp-eval/run + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run with: python-version: ${{ inputs.python-version }} working-directory: ${{ inputs.working-directory }} @@ -79,7 +79,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: Generate and upload badges - uses: ./.github/actions/mcp-eval/badges + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges with: report-path: ${{ steps.mcpeval.outputs.results-json-path }} output-dir: badges diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index 6a522b4..02bc099 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -29,7 +29,7 @@ jobs: # Settings discovery follows mcp-agent/mcpeval configs in your repo. - name: Run MCP-Eval (uv) id: mcpeval - uses: ./.github/actions/mcp-eval/run + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run with: python-version: "3.11" working-directory: . @@ -49,7 +49,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: Generate and upload badges - uses: ./.github/actions/mcp-eval/badges + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges with: report-path: ${{ steps.mcpeval.outputs.results-json-path }} output-dir: badges From 502d5ec70570324a8938e0fd9037d5c95a50d590 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sat, 6 Sep 2025 19:02:25 +0800 Subject: [PATCH 06/13] Update test dir in github workflow --- .github/workflows/mcpeval.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index 02bc099..ba7efff 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -34,7 +34,7 @@ jobs: python-version: "3.11" working-directory: . run-args: "-v" - tests: tests/ + tests: examples/mcp_server_fetch/tests/ reports-dir: mcpeval-reports json-report: mcpeval-results.json markdown-report: mcpeval-results.md From f4df7d10c876cb967bca3b689c037d2f6c860ea9 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sat, 6 Sep 2025 19:02:50 +0800 Subject: [PATCH 07/13] Remove github action version --- docs/examples.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/examples.mdx b/docs/examples.mdx index d9282ff..d3539c8 100644 --- a/docs/examples.mdx +++ b/docs/examples.mdx @@ -955,7 +955,7 @@ jobs: uses: actions/checkout@v4 - name: Run MCP-Eval id: mcpeval - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run@v0.1.0 + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run with: python-version: '3.11' tests: tests/ From e7e9dd0e4870f4373deb412c7306fa3f1b1be0b1 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sun, 7 Sep 2025 00:40:10 +0800 Subject: [PATCH 08/13] Add deploy-pages-branch input and update mcp-eval reusable workflow to comment PR with report. Update mcp-eval to use reusable workflow. --- .github/workflows/mcpeval-reusable.yml | 38 ++++++- .github/workflows/mcpeval.yml | 142 ++----------------------- 2 files changed, 43 insertions(+), 137 deletions(-) diff --git a/.github/workflows/mcpeval-reusable.yml b/.github/workflows/mcpeval-reusable.yml index 01403ed..0a3fea4 100644 --- a/.github/workflows/mcpeval-reusable.yml +++ b/.github/workflows/mcpeval-reusable.yml @@ -43,6 +43,10 @@ on: required: false type: boolean default: false + deploy-pages-branch: + required: false + type: string + default: 'refs/heads/main' secrets: ANTHROPIC_API_KEY: required: false @@ -53,6 +57,8 @@ jobs: run: name: Run MCP-Eval runs-on: ubuntu-latest + permissions: + contents: read outputs: json: ${{ steps.mcpeval.outputs.results-json-path }} md: ${{ steps.mcpeval.outputs.results-md-path }} @@ -86,11 +92,41 @@ jobs: format: 'both' upload-artifacts: 'true' artifact-name: mcpeval-badges + + # Post the Markdown report as a sticky PR comment for easy review + - name: Comment PR with MCP-Eval report + if: ${{ github.event_name == 'pull_request' }} + uses: actions/github-script@v7 + env: + REPORT_PATH: ${{ steps.mcpeval.outputs.results-md-path }} + with: + script: | + const fs = require('fs'); + const path = process.env.REPORT_PATH; + let body = '\n'; + body += '## MCP-Eval Report\n\n'; + try { + const content = fs.readFileSync(path, 'utf8'); + body += content; + } catch (e) { + body += '_No report found at ' + path + '_\n'; + } + const { owner, repo } = context.repo; + const issue_number = context.issue.number; + + // Find existing sticky comment + const { data: comments } = await github.rest.issues.listComments({ owner, repo, issue_number, per_page: 100 }); + const previous = comments.find(c => c.user.type === 'Bot' && c.body.startsWith('')); + if (previous) { + await github.rest.issues.updateComment({ owner, repo, comment_id: previous.id, body }); + } else { + await github.rest.issues.createComment({ owner, repo, issue_number, body }); + } pages: name: Publish report and badges to GitHub Pages needs: run - if: ${{ inputs.deploy-pages }} + if: ${{ inputs.deploy-pages && github.event_name == 'push' && github.ref == inputs.deploy-pages-branch }} runs-on: ubuntu-latest permissions: pages: write diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index ba7efff..2adfdc6 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -10,143 +10,13 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true -permissions: - contents: read - pull-requests: write - jobs: - tests: - name: Run MCP-Eval - runs-on: ubuntu-latest - defaults: - run: - shell: bash - steps: - - name: Checkout - uses: actions/checkout@v4 - - # Tip: set your LLM provider keys in repo/org secrets - # Settings discovery follows mcp-agent/mcpeval configs in your repo. - - name: Run MCP-Eval (uv) - id: mcpeval - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run - with: - python-version: "3.11" - working-directory: . - run-args: "-v" - tests: examples/mcp_server_fetch/tests/ - reports-dir: mcpeval-reports - json-report: mcpeval-results.json - markdown-report: mcpeval-results.md - html-report: mcpeval-results.html - artifact-name: mcpeval-artifacts - pr-comment: "true" - set-summary: "true" - upload-artifacts: "true" - env: - # Provide at least one provider key; both are supported - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - - name: Generate and upload badges - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges - with: - report-path: ${{ steps.mcpeval.outputs.results-json-path }} - output-dir: badges - format: 'both' - upload-artifacts: 'true' - artifact-name: mcpeval-badges - - # Post the Markdown report as a sticky PR comment for easy review - - name: Comment PR with MCP-Eval report - if: ${{ github.event_name == 'pull_request' }} - uses: actions/github-script@v7 - env: - REPORT_PATH: ${{ steps.mcpeval.outputs.results-md-path }} - with: - script: | - const fs = require('fs'); - const path = process.env.REPORT_PATH; - let body = '\n'; - body += '## MCP-Eval Report\n\n'; - try { - const content = fs.readFileSync(path, 'utf8'); - body += content; - } catch (e) { - body += '_No report found at ' + path + '_\n'; - } - const { owner, repo } = context.repo; - const issue_number = context.issue.number; - - // Find existing sticky comment - const { data: comments } = await github.rest.issues.listComments({ owner, repo, issue_number, per_page: 100 }); - const previous = comments.find(c => c.user.type === 'Bot' && c.body.startsWith('')); - if (previous) { - await github.rest.issues.updateComment({ owner, repo, comment_id: previous.id, body }); - } else { - await github.rest.issues.createComment({ owner, repo, issue_number, body }); - } - - # Optional: Publish the HTML report and badges to GitHub Pages on main/master pushes - pages: - name: Publish report and badges to GitHub Pages - needs: tests - if: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master') }} - runs-on: ubuntu-latest + call-mcpeval: + uses: lastmile-ai/mcp-eval/.github/workflows/mcpeval-reuseable.yml + with: + deploy-pages: true permissions: + contents: read pages: write id-token: write - contents: read - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - concurrency: - group: "pages" - cancel-in-progress: false - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Download badge artifacts - uses: actions/download-artifact@v4 - with: - name: mcpeval-badges - path: badges/ - - - name: Download report artifacts - uses: actions/download-artifact@v4 - with: - name: mcpeval-artifacts - path: ./mcpeval-artifacts - - - name: Prepare site - run: | - set -euo pipefail - mkdir -p site - - # Copy badges to site - if [[ -d badges ]]; then - cp -r badges site/ - fi - - # Copy HTML report as index.html - if [[ -f "mcpeval-artifacts/mcpeval-reports/mcpeval-results.html" ]]; then - cp mcpeval-artifacts/mcpeval-reports/mcpeval-results.html site/index.html - else - file=$(find mcpeval-artifacts -name "*.html" | head -n 1 || true) - if [[ -n "$file" ]]; then cp "$file" site/index.html; else echo '

No report available

' > site/index.html; fi - fi - - - name: Setup Pages - uses: actions/configure-pages@v5 - - - name: Upload Pages artifact - uses: actions/upload-pages-artifact@v3 - with: - path: ./site - - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 - - + secrets: inherit \ No newline at end of file From 7c768d0cfa29fc79229f88bde43514679c07ff6a Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sun, 7 Sep 2025 00:54:05 +0800 Subject: [PATCH 09/13] Update github workflow in docs --- docs/ci-cd.mdx | 94 +++++++++++++++++++------------- docs/common-workflows.mdx | 112 +++++++++----------------------------- 2 files changed, 80 insertions(+), 126 deletions(-) diff --git a/docs/ci-cd.mdx b/docs/ci-cd.mdx index c57c992..3874a21 100644 --- a/docs/ci-cd.mdx +++ b/docs/ci-cd.mdx @@ -8,48 +8,64 @@ keywords: ["github actions", "artifacts", "pages", "badges"] ### Run action -Use the uv‑based action to install, run, and upload artifacts. For the complete workflow configuration, visit [mcpeval.yml](https://github.com/lastmile-ai/mcp-eval/blob/main/.github/workflows/mcpeval.yml). +The recommended approach is to use the reusable workflow which handles all the setup, testing, and deployment. For the complete workflow configuration, visit [mcpeval.yml](https://github.com/lastmile-ai/mcp-eval/blob/main/.github/workflows/mcpeval.yml). ```yaml +name: MCP-Eval CI + +on: + push: + branches: [main, master, trunk] + workflow_dispatch: + jobs: - tests: - name: Run MCP-Eval - runs-on: ubuntu-latest - defaults: - run: - shell: bash - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Run MCP-Eval (uv) - id: mcpeval - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run - with: - python-version: "3.11" - working-directory: . - run-args: "-v" - tests: tests/ - reports-dir: mcpeval-reports - json-report: mcpeval-results.json - markdown-report: mcpeval-results.md - html-report: mcpeval-results.html - artifact-name: mcpeval-artifacts - pr-comment: "true" - set-summary: "true" - upload-artifacts: "true" - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - - name: Generate and upload badges - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges - with: - report-path: ${{ steps.mcpeval.outputs.results-json-path }} - output-dir: badges - format: "both" - upload-artifacts: "true" - artifact-name: mcpeval-badges + call-mcpeval: + uses: lastmile-ai/mcp-eval/.github/workflows/mcpeval-reusable.yml + with: + deploy-pages: true + permissions: + contents: read + pages: write + id-token: write + pull-requests: write + secrets: inherit +``` + +Alternatively, you can directly use the action in your workflow: + +```yaml +steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Run MCP-Eval (uv) + id: mcpeval + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run + with: + python-version: "3.11" + working-directory: . + run-args: "-v" + tests: tests/ + reports-dir: mcpeval-reports + json-report: mcpeval-results.json + markdown-report: mcpeval-results.md + html-report: mcpeval-results.html + artifact-name: mcpeval-artifacts + pr-comment: "true" + set-summary: "true" + upload-artifacts: "true" + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + - name: Generate and upload badges + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges + with: + report-path: ${{ steps.mcpeval.outputs.results-json-path }} + output-dir: badges + format: "both" + upload-artifacts: "true" + artifact-name: mcpeval-badges ``` Sources: diff --git a/docs/common-workflows.mdx b/docs/common-workflows.mdx index 8883107..4641d1d 100644 --- a/docs/common-workflows.mdx +++ b/docs/common-workflows.mdx @@ -297,106 +297,44 @@ Ensure your agent follows the optimal execution path: - Create `.github/workflows/mcp-eval.yml`: + Create `.github/workflows/mcp-eval.yml` using the reusable workflow: ```yaml name: MCP-Eval CI on: - pull_request: push: - branches: [main] + branches: [main, master, trunk] + pull_request: + workflow_dispatch: - permissions: - contents: read - pull-requests: write + # Cancel redundant runs on the same ref + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Run MCP-Eval - id: mcpeval - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run@v1 - with: - python-version: '3.11' - tests: tests/ - run-args: '-v --max-concurrency 4' - pr-comment: 'true' - set-summary: 'true' - upload-artifacts: 'true' - env: - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - - - name: Generate and upload badges - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges@v1 - with: - report-path: ${{ steps.mcpeval.outputs.results-json-path }} - output-dir: badges - format: 'both' - upload-artifacts: 'true' - artifact-name: mcpeval-badges - - # Deploy badges and report to GitHub Pages - pages: - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - needs: test - runs-on: ubuntu-latest + call-mcpeval: + uses: lastmile-ai/mcp-eval/.github/workflows/mcpeval-reusable.yml + with: + deploy-pages: true + # Optional: customize test configuration + # python-version: '3.11' + # tests: 'tests/' + # run-args: '-v --max-concurrency 4' permissions: + contents: read pages: write id-token: write - contents: read - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - steps: - - uses: actions/checkout@v4 - - - name: Download badge artifacts - uses: actions/download-artifact@v4 - with: - name: mcpeval-badges - path: badges/ - - - name: Download report artifacts - uses: actions/download-artifact@v4 - with: - name: mcpeval-artifacts - path: ./mcpeval-artifacts - - - name: Prepare site - run: | - set -euo pipefail - mkdir -p site - - # Copy badges to site - if [[ -d badges ]]; then - cp -r badges site/ - fi - - # Copy HTML report as index.html - if [[ -f "mcpeval-artifacts/mcpeval-reports/mcpeval-results.html" ]]; then - cp mcpeval-artifacts/mcpeval-reports/mcpeval-results.html site/index.html - else - file=$(find mcpeval-artifacts -name "*.html" | head -n 1 || true) - if [[ -n "$file" ]]; then cp "$file" site/index.html; else echo '

No report available

' > site/index.html; fi - fi - - - name: Setup Pages - uses: actions/configure-pages@v5 - - - name: Upload Pages artifact - uses: actions/upload-pages-artifact@v3 - with: - path: ./site - - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 + pull-requests: write + secrets: inherit ``` + + This reusable workflow automatically: + - Runs tests and generates reports + - Posts PR comments with results + - Uploads artifacts + - Deploys badges and HTML reports to GitHub Pages (on main branch)
From d696df6e2a204851c448b57dc38e19e528a60fee Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sun, 7 Sep 2025 00:54:20 +0800 Subject: [PATCH 10/13] Update github workflow permissions --- .github/workflows/mcpeval-reusable.yml | 1 + .github/workflows/mcpeval.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/mcpeval-reusable.yml b/.github/workflows/mcpeval-reusable.yml index 0a3fea4..f9fb252 100644 --- a/.github/workflows/mcpeval-reusable.yml +++ b/.github/workflows/mcpeval-reusable.yml @@ -59,6 +59,7 @@ jobs: runs-on: ubuntu-latest permissions: contents: read + pull-requests: write outputs: json: ${{ steps.mcpeval.outputs.results-json-path }} md: ${{ steps.mcpeval.outputs.results-md-path }} diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index 2adfdc6..67372b2 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -19,4 +19,5 @@ jobs: contents: read pages: write id-token: write + pull-requests: write secrets: inherit \ No newline at end of file From f4e22c9b09f4071c253428d44411638867fc619d Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sun, 7 Sep 2025 01:14:29 +0800 Subject: [PATCH 11/13] Add a simple decorator test for docs purpose --- .../tests/test_simple_decorator.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 examples/mcp_server_fetch/tests/test_simple_decorator.py diff --git a/examples/mcp_server_fetch/tests/test_simple_decorator.py b/examples/mcp_server_fetch/tests/test_simple_decorator.py new file mode 100644 index 0000000..5daf24d --- /dev/null +++ b/examples/mcp_server_fetch/tests/test_simple_decorator.py @@ -0,0 +1,18 @@ +from mcp_eval import task, setup, Expect +from mcp_eval.session import TestAgent, TestSession + + +@setup +def configure_decorator_tests(): + pass + + +@task("basic_website_fetch") +async def basic_website_fetch(agent: TestAgent, session: TestSession): + await agent.generate_str( + "Please fetch the content from https://example.com and tell me what you find" + ) + await session.assert_that(Expect.tools.was_called("fetch", min_times=1)) + await session.assert_that( + Expect.tools.called_with("fetch", {"url": "https://example.com"}) + ) From e9a6b2c2eeb6485855743e29d26d13ff3f11ab38 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sun, 7 Sep 2025 01:16:07 +0800 Subject: [PATCH 12/13] Set mcpeval github workflow to run only test from specific file --- .github/workflows/mcpeval.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index 67372b2..1c15d30 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -15,6 +15,7 @@ jobs: uses: lastmile-ai/mcp-eval/.github/workflows/mcpeval-reuseable.yml with: deploy-pages: true + tests: examples/mcp_server_fetch/tests/test_simple_decorator.py permissions: contents: read pages: write From 7ef2bfa8d9f742e20a0bcb4bab00e3302eac7a28 Mon Sep 17 00:00:00 2001 From: StreetLamb Date: Sun, 7 Sep 2025 01:33:32 +0800 Subject: [PATCH 13/13] Update uses field in github workflow to include @ref --- .github/workflows/mcpeval-reusable.yml | 4 ++-- .github/workflows/mcpeval.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mcpeval-reusable.yml b/.github/workflows/mcpeval-reusable.yml index f9fb252..ad0613d 100644 --- a/.github/workflows/mcpeval-reusable.yml +++ b/.github/workflows/mcpeval-reusable.yml @@ -70,7 +70,7 @@ jobs: - name: Run MCP-Eval id: mcpeval - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run@main with: python-version: ${{ inputs.python-version }} working-directory: ${{ inputs.working-directory }} @@ -86,7 +86,7 @@ jobs: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: Generate and upload badges - uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges + uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges@main with: report-path: ${{ steps.mcpeval.outputs.results-json-path }} output-dir: badges diff --git a/.github/workflows/mcpeval.yml b/.github/workflows/mcpeval.yml index 1c15d30..b50e689 100644 --- a/.github/workflows/mcpeval.yml +++ b/.github/workflows/mcpeval.yml @@ -12,7 +12,7 @@ concurrency: jobs: call-mcpeval: - uses: lastmile-ai/mcp-eval/.github/workflows/mcpeval-reuseable.yml + uses: lastmile-ai/mcp-eval/.github/workflows/mcpeval-reuseable.yml@main with: deploy-pages: true tests: examples/mcp_server_fetch/tests/test_simple_decorator.py