Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions .github/actions/mcp-eval/badges/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
name: "Generate MCP-Eval Badges"
description: "Generate badges from MCP-Eval JSON report using shields.io"
branding:
icon: award
color: green
inputs:
report-path:
description: "Path to the MCP-Eval JSON report file"
required: true
output-dir:
description: "Directory to write generated badge files (optional, for caching)"
default: "mcpeval-reports/badges"
required: false
format:
description: "Output format: svg, endpoint, or both (default: both)"
default: "both"
required: false
tests-label:
description: "Label text for the tests badge"
default: "mcp-tests"
required: false
coverage-label:
description: "Label text for the coverage badge"
default: "mcp-cov"
required: false
upload-artifacts:
description: "Upload badges as workflow artifacts"
default: "false"
required: false
artifact-name:
description: "Name for the uploaded badge artifacts"
default: "mcpeval-badges"
required: false
outputs:
tests-badge-path:
description: "Path to the generated tests badge SVG (if output-dir is set)"
value: ${{ steps.generate.outputs.tests_badge_path }}
coverage-badge-path:
description: "Path to the generated coverage badge SVG (if output-dir is set)"
value: ${{ steps.generate.outputs.coverage_badge_path }}
runs:
using: "composite"
steps:
- name: Generate badges
id: generate
shell: bash
run: |
set -euo pipefail

# Check if local script exists, otherwise fetch from upstream
if [ -f "scripts/generate_badges.py" ]; then
echo "Using local badge generation script"
SCRIPT_PATH="scripts/generate_badges.py"
else
echo "Fetching badge generation script from upstream"
# Create a temporary directory for the mcp-eval script
mkdir -p .mcp-eval-action
cd .mcp-eval-action

# Initialize git and configure sparse checkout
git init
git remote add origin https://github.com/lastmile-ai/mcp-eval.git
git config core.sparseCheckout true

# Configure sparse checkout to only get the script we need
echo "scripts/generate_badges.py" >> .git/info/sparse-checkout

# Fetch and checkout the specific file (pinned to a stable commit)
# TODO: Update this to a specific tag/release when available
git fetch --depth=1 origin main
git checkout origin/main

# Move back to the workspace root
cd ..
SCRIPT_PATH=".mcp-eval-action/scripts/generate_badges.py"
fi

# Run the badge generation script with uv
uv run "$SCRIPT_PATH" \
--report "${{ inputs.report-path }}" \
--outdir "${{ inputs.output-dir }}" \
--label-tests "${{ inputs.tests-label }}" \
--label-cov "${{ inputs.coverage-label }}" \
--format "${{ inputs.format }}"

# Set output paths if badges were generated
if [ -n "${{ inputs.output-dir }}" ]; then
if [ -f "${{ inputs.output-dir }}/tests.svg" ] && [ -f "${{ inputs.output-dir }}/coverage.svg" ]; then
echo "tests_badge_path=$(realpath ${{ inputs.output-dir }}/tests.svg)" >> $GITHUB_OUTPUT
echo "coverage_badge_path=$(realpath ${{ inputs.output-dir }}/coverage.svg)" >> $GITHUB_OUTPUT
fi
fi

- name: Upload badge artifacts
if: ${{ inputs.upload-artifacts == 'true' && inputs.output-dir != '' }}
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.artifact-name }}
path: ${{ inputs.output-dir }}
retention-days: 14
if-no-files-found: warn
83 changes: 70 additions & 13 deletions .github/workflows/mcpeval-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ on:
required: false
type: boolean
default: false
deploy-pages-branch:
required: false
type: string
default: 'refs/heads/main'
secrets:
ANTHROPIC_API_KEY:
required: false
Expand All @@ -53,6 +57,9 @@ jobs:
run:
name: Run MCP-Eval
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write
outputs:
json: ${{ steps.mcpeval.outputs.results-json-path }}
md: ${{ steps.mcpeval.outputs.results-md-path }}
Expand All @@ -63,7 +70,7 @@ jobs:

- name: Run MCP-Eval
id: mcpeval
uses: ./.github/actions/mcp-eval/run
uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/run@main
with:
python-version: ${{ inputs.python-version }}
working-directory: ${{ inputs.working-directory }}
Expand All @@ -78,21 +85,49 @@ jobs:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

- name: Generate badges
run: |
set -euo pipefail
uv run scripts/generate_badges.py --report "${{ steps.mcpeval.outputs.results-json-path }}" --outdir mcpeval-reports/badges

- name: Upload badge artifacts
uses: actions/upload-artifact@v4
- name: Generate and upload badges
uses: lastmile-ai/mcp-eval/.github/actions/mcp-eval/badges@main
with:
name: mcpeval-badges
path: mcpeval-reports/badges
report-path: ${{ steps.mcpeval.outputs.results-json-path }}
output-dir: badges
format: 'both'
upload-artifacts: 'true'
artifact-name: mcpeval-badges

# Post the Markdown report as a sticky PR comment for easy review
- name: Comment PR with MCP-Eval report
if: ${{ github.event_name == 'pull_request' }}
uses: actions/github-script@v7
env:
REPORT_PATH: ${{ steps.mcpeval.outputs.results-md-path }}
with:
script: |
const fs = require('fs');
const path = process.env.REPORT_PATH;
let body = '<!-- mcpeval-report -->\n';
body += '## MCP-Eval Report\n\n';
try {
const content = fs.readFileSync(path, 'utf8');
body += content;
} catch (e) {
body += '_No report found at ' + path + '_\n';
}
const { owner, repo } = context.repo;
const issue_number = context.issue.number;

// Find existing sticky comment
const { data: comments } = await github.rest.issues.listComments({ owner, repo, issue_number, per_page: 100 });
const previous = comments.find(c => c.user.type === 'Bot' && c.body.startsWith('<!-- mcpeval-report -->'));
if (previous) {
await github.rest.issues.updateComment({ owner, repo, comment_id: previous.id, body });
} else {
await github.rest.issues.createComment({ owner, repo, issue_number, body });
}

pages:
name: Publish Report to Pages
name: Publish report and badges to GitHub Pages
needs: run
if: ${{ inputs.deploy-pages }}
if: ${{ inputs.deploy-pages && github.event_name == 'push' && github.ref == inputs.deploy-pages-branch }}
runs-on: ubuntu-latest
permissions:
pages: write
Expand All @@ -101,8 +136,20 @@ jobs:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
concurrency:
group: "pages"
cancel-in-progress: false
steps:
- name: Download artifacts
- name: Checkout
uses: actions/checkout@v4

- name: Download badge artifacts
uses: actions/download-artifact@v4
with:
name: mcpeval-badges
path: badges/

- name: Download report artifacts
uses: actions/download-artifact@v4
with:
name: ${{ inputs.artifact-name }}
Expand All @@ -112,13 +159,23 @@ jobs:
run: |
set -euo pipefail
mkdir -p site

# Copy badges to site
if [[ -d badges ]]; then
cp -r badges site/
fi

# Copy HTML report as index.html
if [[ -f "mcpeval-artifacts/${{ inputs.reports-dir }}/${{ inputs.html-report }}" ]]; then
cp mcpeval-artifacts/${{ inputs.reports-dir }}/${{ inputs.html-report }} site/index.html
else
file=$(find mcpeval-artifacts -name "*.html" | head -n 1 || true)
if [[ -n "$file" ]]; then cp "$file" site/index.html; else echo '<h1>No report available</h1>' > site/index.html; fi
fi

- name: Setup Pages
uses: actions/configure-pages@v5

- name: Upload Pages artifact
uses: actions/upload-pages-artifact@v3
with:
Expand Down
130 changes: 8 additions & 122 deletions .github/workflows/mcpeval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,129 +10,15 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read
pull-requests: write

jobs:
tests:
name: Run MCP-Eval
runs-on: ubuntu-latest
defaults:
run:
shell: bash
steps:
- name: Checkout
uses: actions/checkout@v4

# Tip: set your LLM provider keys in repo/org secrets
# Settings discovery follows mcp-agent/mcpeval configs in your repo.
- name: Run MCP-Eval (uv)
id: mcpeval
uses: ./.github/actions/mcp-eval/run
with:
python-version: "3.11"
working-directory: .
run-args: "-v"
tests: tests/
reports-dir: mcpeval-reports
json-report: mcpeval-results.json
markdown-report: mcpeval-results.md
html-report: mcpeval-results.html
artifact-name: mcpeval-artifacts
pr-comment: "true"
set-summary: "true"
upload-artifacts: "true"
env:
# Provide at least one provider key; both are supported
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

- name: Generate badges
run: |
set -euo pipefail
uv run scripts/generate_badges.py --report "${{ steps.mcpeval.outputs.results-json-path }}" --outdir mcpeval-reports/badges

- name: Upload badge artifacts
uses: actions/upload-artifact@v4
with:
name: mcpeval-badges
path: mcpeval-reports/badges

# Post the Markdown report as a sticky PR comment for easy review
- name: Comment PR with MCP-Eval report
if: ${{ github.event_name == 'pull_request' }}
uses: actions/github-script@v7
env:
REPORT_PATH: ${{ steps.mcpeval.outputs.results-md-path }}
with:
script: |
const fs = require('fs');
const path = process.env.REPORT_PATH;
let body = '<!-- mcpeval-report -->\n';
body += '## MCP-Eval Report\n\n';
try {
const content = fs.readFileSync(path, 'utf8');
body += content;
} catch (e) {
body += '_No report found at ' + path + '_\n';
}
const { owner, repo } = context.repo;
const issue_number = context.issue.number;

// Find existing sticky comment
const { data: comments } = await github.rest.issues.listComments({ owner, repo, issue_number, per_page: 100 });
const previous = comments.find(c => c.user.type === 'Bot' && c.body.startsWith('<!-- mcpeval-report -->'));
if (previous) {
await github.rest.issues.updateComment({ owner, repo, comment_id: previous.id, body });
} else {
await github.rest.issues.createComment({ owner, repo, issue_number, body });
}

# Optional: Publish the HTML report to GitHub Pages on main/master pushes
pages:
name: Publish Report to Pages
needs: tests
if: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master') }}
runs-on: ubuntu-latest
call-mcpeval:
uses: lastmile-ai/mcp-eval/.github/workflows/mcpeval-reuseable.yml@main
with:
deploy-pages: true
tests: examples/mcp_server_fetch/tests/test_simple_decorator.py
permissions:
contents: read
pages: write
id-token: write
contents: read
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
name: mcpeval-artifacts
path: ./mcpeval-artifacts

- name: Prepare site
run: |
set -euo pipefail
mkdir -p site
# Prefer the configured HTML filename; fallback to first HTML we find
if [[ -f "mcpeval-artifacts/mcpeval-reports/mcpeval-results.html" ]]; then
cp mcpeval-artifacts/mcpeval-reports/mcpeval-results.html site/index.html
else
file=$(find mcpeval-artifacts -name "*.html" | head -n 1 || true)
if [[ -n "$file" ]]; then cp "$file" site/index.html; else echo '<h1>No report available</h1>' > site/index.html; fi
fi
# Include badges if available
if [[ -d "mcpeval-artifacts/mcpeval-reports/badges" ]]; then
mkdir -p site/badges
cp -r mcpeval-artifacts/mcpeval-reports/badges/*.svg site/badges/ || true
fi

- name: Upload Pages artifact
uses: actions/upload-pages-artifact@v3
with:
path: ./site

- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4


pull-requests: write
secrets: inherit
Loading