Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Automatically activate Python venv and sync dependencies on cd
# Requires: direnv (https://direnv.net/)
# Install: sudo apt install direnv && echo 'eval "$(direnv hook bash)"' >> ~/.bashrc

# Create venv if it doesn't exist
if [[ ! -d .venv ]]; then
echo "Creating .venv with uv..."
uv venv .venv
fi

# Activate the venv
source .venv/bin/activate

# Ensure dependencies are synced
uv sync --quiet

# Set PYO3 to use this venv's Python
export PYO3_PYTHON="${PWD}/.venv/bin/python"
243 changes: 243 additions & 0 deletions .github/workflows/ci-failure-analysis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
---
name: CI Failure Analysis

# Triggers when the main CI workflow completes
on:
workflow_run:
workflows: ["CI"]
types: [completed]

permissions:
contents: read
actions: read
pull-requests: write

jobs:
analyze-failure:
name: Analyze CI Failure with AI
# Use GitHub-hosted runner for security (defense-in-depth, ephemeral environment)
# Self-hosted runners risk secret exposure and network access from fork PRs
runs-on: ubuntu-latest
# Only run when CI fails AND it's a PR (not push to main)
if: >-
github.event.workflow_run.conclusion == 'failure' &&
github.event.workflow_run.event == 'pull_request'
env:
GH_TOKEN: ${{ github.token }}

steps:
# Verify conclusion (workaround for race condition where conclusion can be empty)
- name: Verify workflow conclusion
id: verify
run: |
CONCLUSION=$(gh api "/repos/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}" \
--jq '.conclusion')

if [ "$CONCLUSION" != "failure" ]; then
echo "Conclusion mismatch or race condition detected (got: $CONCLUSION)"
echo "verified=false" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "verified=true" >> "$GITHUB_OUTPUT"
echo "Verified: workflow actually failed"

# Get PR number using SHA lookup (works for fork PRs too)
# Note: github.event.workflow_run.pull_requests is EMPTY for fork PRs (known GitHub bug)
- name: Get PR number from commit SHA
if: steps.verify.outputs.verified == 'true'
id: pr
uses: actions/github-script@v7
with:
script: |
const { data: prs } = await github.rest.repos.listPullRequestsAssociatedWithCommit({
owner: context.repo.owner,
repo: context.repo.repo,
commit_sha: context.payload.workflow_run.head_sha
});

if (prs.length === 0) {
console.log('No PR associated with this commit');
return { skip: true };
}

const pr = prs[0];
console.log(`Found PR #${pr.number}: ${pr.title}`);
return {
skip: false,
number: pr.number,
title: pr.title
};

- name: Fetch failed job logs
if: steps.verify.outputs.verified == 'true' && fromJSON(steps.pr.outputs.result).skip != true
id: logs
run: |
set -euo pipefail

RUN_ID="${{ github.event.workflow_run.id }}"

# Get failed jobs
FAILED_JOBS=$(gh api \
"/repos/${{ github.repository }}/actions/runs/$RUN_ID/jobs" \
--jq '.jobs[] | select(.conclusion == "failure") | {name: .name, id: .id}')

if [[ -z "$FAILED_JOBS" ]]; then
echo "No failed jobs found"
echo "has_logs=false" >> "$GITHUB_OUTPUT"
exit 0
fi

# Create logs directory
mkdir -p /tmp/ci-logs

# Fetch logs for each failed job (limit to first 3 to avoid token limits)
echo "$FAILED_JOBS" | jq -c '.' | head -3 | while read -r job; do
JOB_ID=$(echo "$job" | jq -r '.id')
JOB_NAME=$(echo "$job" | jq -r '.name' | tr ' /' '__')

echo "Fetching logs for job: $JOB_NAME ($JOB_ID)"

# Get job logs (last 500 lines to keep context manageable)
gh api \
"/repos/${{ github.repository }}/actions/jobs/$JOB_ID/logs" \
2>/dev/null | tail -500 > "/tmp/ci-logs/${JOB_NAME}.log" || true
done

# Combine logs with headers
{
echo "=== CI FAILURE LOGS ==="
echo "Repository: ${{ github.repository }}"
echo "Run ID: $RUN_ID"
echo "Commit: ${{ github.event.workflow_run.head_sha }}"
echo "Branch: ${{ github.event.workflow_run.head_branch }}"
echo ""

for log in /tmp/ci-logs/*.log; do
if [[ -f "$log" ]]; then
echo "--- $(basename "$log" .log) ---"
cat "$log"
echo ""
fi
done
} > /tmp/combined-logs.txt

# Truncate to ~50KB to stay within API limits
head -c 50000 /tmp/combined-logs.txt > /tmp/analysis-input.txt

echo "has_logs=true" >> "$GITHUB_OUTPUT"
echo "Logs prepared for analysis ($(wc -c < /tmp/analysis-input.txt) bytes)"

- name: Analyze failure with Gemini AI
if: steps.verify.outputs.verified == 'true' && fromJSON(steps.pr.outputs.result).skip != true && steps.logs.outputs.has_logs == 'true'
id: analysis
env:
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
run: |
set -euo pipefail

# Check if API key is configured
if [[ -z "${GEMINI_API_KEY:-}" ]]; then
echo "GEMINI_API_KEY not configured, creating placeholder analysis"
cat > /tmp/analysis-result.md << 'EOF'
**AI analysis not available** - `GEMINI_API_KEY` secret not configured.

To enable AI-powered failure analysis:
1. Get a free API key from https://aistudio.google.com/app/apikey
2. Add it as `GEMINI_API_KEY` in repository Settings → Secrets → Actions
EOF
echo "analysis_available=false" >> "$GITHUB_OUTPUT"
exit 0
fi

# Prepare the analysis prompt
PROMPT="You are a CI/CD debugging expert analyzing GitHub Actions failures for a Rust project (ColdVox - voice-to-text application).

Analyze these failure logs and provide:

1. **Root Cause**: What specifically failed and why (be precise, cite error messages)
2. **Fix Suggestion**: Concrete steps to fix the issue (code changes, config updates, etc.)
3. **Category**: One of: build-error, test-failure, dependency-issue, configuration-error, flaky-test, infrastructure-issue

Keep the response concise (under 400 words). Use markdown formatting.
Focus on actionable fixes, not general advice.

LOGS:
$(cat /tmp/analysis-input.txt)"

# Call Gemini API with retry logic (exponential backoff)
# Using gemini-2.5-flash (latest stable with thinking/reasoning capabilities)
# NOT setting maxOutputTokens - let the model use its full thinking budget
# Thinking models need room to reason; constraining output cripples them
MAX_RETRIES=3
RETRY_DELAY=2

for i in $(seq 1 $MAX_RETRIES); do
HTTP_RESPONSE=$(curl -s -w "\n%{http_code}" \
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${GEMINI_API_KEY}" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg prompt "$PROMPT" '{
contents: [{parts: [{text: $prompt}]}]
}')")

HTTP_CODE=$(echo "$HTTP_RESPONSE" | tail -n1)
BODY=$(echo "$HTTP_RESPONSE" | head -n-1)

if [ "$HTTP_CODE" -eq 200 ]; then
RESPONSE=$(echo "$BODY" | jq -r '.candidates[0].content.parts[0].text // "Analysis failed - no response"')
echo "$RESPONSE" > /tmp/analysis-result.md
echo "analysis_available=true" >> "$GITHUB_OUTPUT"
echo "Analysis complete"
exit 0
elif [ "$HTTP_CODE" -eq 429 ]; then
# Rate limit - exponential backoff
WAIT_TIME=$((RETRY_DELAY * (2 ** (i - 1))))
echo "Rate limit hit, waiting ${WAIT_TIME}s (attempt $i/$MAX_RETRIES)..." >&2
sleep $WAIT_TIME
else
echo "API error: HTTP $HTTP_CODE" >&2
echo "$BODY" >&2
if [ "$i" -eq "$MAX_RETRIES" ]; then
echo "API request failed after $MAX_RETRIES attempts" > /tmp/analysis-result.md
echo "analysis_available=false" >> "$GITHUB_OUTPUT"
exit 0
fi
sleep $RETRY_DELAY
fi
done

- name: Post analysis to PR
if: steps.verify.outputs.verified == 'true' && fromJSON(steps.pr.outputs.result).skip != true && steps.logs.outputs.has_logs == 'true'
run: |
PR_NUMBER="${{ fromJSON(steps.pr.outputs.result).number }}"
RUN_ID="${{ github.event.workflow_run.id }}"

# Check if we already commented on this run to avoid spam
EXISTING_COMMENT=$(gh api \
"/repos/${{ github.repository }}/issues/$PR_NUMBER/comments" \
--jq ".[] | select(.body | contains(\"CI Failure Analysis\")) | select(.body | contains(\"$RUN_ID\")) | .id" \
| head -1 || true)

# Build comment body
COMMENT_BODY="## 🤖 CI Failure Analysis

**Workflow Run**: [#$RUN_ID](${{ github.event.workflow_run.html_url }})
**Commit**: \`${{ github.event.workflow_run.head_sha }}\`
**Branch**: \`${{ github.event.workflow_run.head_branch }}\`

$(cat /tmp/analysis-result.md)

---
<sub>🔧 Auto-generated by [CI Failure Analyzer](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) using Gemini 2.5 Flash</sub>"

if [[ -n "$EXISTING_COMMENT" ]]; then
echo "Updating existing comment $EXISTING_COMMENT"
gh api \
"/repos/${{ github.repository }}/issues/comments/$EXISTING_COMMENT" \
-X PATCH \
-f body="$COMMENT_BODY"
else
echo "Creating new comment on PR #$PR_NUMBER"
gh pr comment "$PR_NUMBER" --body "$COMMENT_BODY"
fi

echo "✅ Analysis posted to PR #$PR_NUMBER"
Loading
Loading