diff --git a/.gitignore b/.gitignore index 249a910f5f..c6383d75b3 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,13 @@ CLAUDE.local.md .env .idea/ .agents/ -.worktrees/ \ No newline at end of file +.worktrees/ + +# Additional ignores +.devcontainer/ +.claude/ +.agentic-qe/ +.claude-flow/ +.mcp.json +*.db +node_modules/ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index f4876c26ad..eb538b0e34 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1 +1,164 @@ -See [AGENTS.md](AGENTS.md) +# For Superplane project instructions See [AGENTS.md](AGENTS.md) + +## Agentic QE v3 + +This project uses **Agentic QE v3** - a Domain-Driven Quality Engineering platform with 13 bounded contexts, ReasoningBank learning, HNSW vector search, and Agent Teams coordination (ADR-064). + +--- + +### CRITICAL POLICIES + +#### Integrity Rule (ABSOLUTE) +- NO shortcuts, fake data, or false claims +- ALWAYS implement properly, verify before claiming success +- ALWAYS use real database queries for integration tests +- ALWAYS run actual tests, not assume they pass + +**We value the quality we deliver to our users.** + +#### Test Execution +- NEVER run `npm test` without `--run` flag (watch mode risk) +- Use: `npm test -- --run`, `npm run test:unit`, `npm run test:integration` when available + +#### Data Protection +- NEVER run `rm -f` on `.agentic-qe/` or `*.db` files without confirmation +- ALWAYS backup before database operations + +#### Git Operations +- NEVER auto-commit/push without explicit user request +- ALWAYS wait for user confirmation before git operations + +--- + +### Quick Reference + +```bash +# Run tests +npm test -- --run + +# Check quality +aqe quality assess + +# Generate tests +aqe test generate + +# Coverage analysis +aqe coverage +``` + +### Using AQE MCP Tools + +AQE exposes tools via MCP with the `mcp__agentic-qe__` prefix. You MUST call `fleet_init` before any other tool. + +#### 1. Initialize the Fleet (required first step) + +```typescript +mcp__agentic-qe__fleet_init({ + topology: "hierarchical", + maxAgents: 15, + memoryBackend: "hybrid" +}) +``` + +#### 2. Generate Tests + +```typescript +mcp__agentic-qe__test_generate_enhanced({ + targetPath: "src/services/auth.ts", + framework: "vitest", + strategy: "boundary-value" +}) +``` + +#### 3. Analyze Coverage + +```typescript +mcp__agentic-qe__coverage_analyze_sublinear({ + paths: ["src/"], + threshold: 80 +}) +``` + +#### 4. Assess Quality + +```typescript +mcp__agentic-qe__quality_assess({ + scope: "full", + includeMetrics: true +}) +``` + +#### 5. Store and Query Patterns (with learning persistence) + +```typescript +// Store a learned pattern +mcp__agentic-qe__memory_store({ + key: "patterns/coverage-gap/{timestamp}", + namespace: "learning", + value: { + pattern: "...", + confidence: 0.95, + type: "coverage-gap", + metadata: { /* domain-specific */ } + }, + persist: true +}) + +// Query stored patterns +mcp__agentic-qe__memory_query({ + pattern: "patterns/*", + namespace: "learning", + limit: 10 +}) +``` + +#### 6. Orchestrate Multi-Agent Tasks + +```typescript +mcp__agentic-qe__task_orchestrate({ + task: "Full quality assessment of auth module", + domains: ["test-generation", "coverage-analysis", "security-compliance"], + parallel: true +}) +``` + +### MCP Tool Reference + +| Tool | Description | +|------|-------------| +| `fleet_init` | Initialize QE fleet (MUST call first) | +| `fleet_status` | Get fleet health and agent status | +| `agent_spawn` | Spawn specialized QE agent | +| `test_generate_enhanced` | AI-powered test generation | +| `test_execute_parallel` | Parallel test execution with retry | +| `task_orchestrate` | Orchestrate multi-agent QE tasks | +| `coverage_analyze_sublinear` | O(log n) coverage analysis | +| `quality_assess` | Quality gate evaluation | +| `memory_store` | Store patterns with namespace + persist | +| `memory_query` | Query patterns by namespace/pattern | +| `security_scan_comprehensive` | SAST/DAST scanning | + +### Configuration + +- **Enabled Domains**: test-generation, test-execution, coverage-analysis, quality-assessment, defect-intelligence, requirements-validation (+6 more) +- **Learning**: Enabled (transformer embeddings) +- **Max Concurrent Agents**: 15 +- **Background Workers**: pattern-consolidator, routing-accuracy-monitor, coverage-gap-scanner, flaky-test-detector + +### V3 QE Agents + +QE agents are in `.claude/agents/v3/`. Use with Task tool: + +```javascript +Task({ prompt: "Generate tests", subagent_type: "qe-test-architect", run_in_background: true }) +Task({ prompt: "Find coverage gaps", subagent_type: "qe-coverage-specialist", run_in_background: true }) +Task({ prompt: "Security audit", subagent_type: "qe-security-scanner", run_in_background: true }) +``` + +### Data Storage + +- **Memory Backend**: `.agentic-qe/memory.db` (SQLite) +- **Configuration**: `.agentic-qe/config.yaml` + +--- +*Generated by AQE v3 init - 2026-03-28T08:27:45.923Z* diff --git a/docs/hackathon-ideas-qe.md b/docs/hackathon-ideas-qe.md new file mode 100644 index 0000000000..00a054cdd7 --- /dev/null +++ b/docs/hackathon-ideas-qe.md @@ -0,0 +1,330 @@ +# Superplane Hackathon Ideas — Quality Engineering Focus + +Six Thinking Hats analysis of QE-focused hackathon projects leveraging Agentic QE v3. +Scoped for a **3-hour hackathon timeframe**. + +--- + +## QE Arsenal Available + +| Category | Count | Highlights | +|----------|-------|------------| +| QE Agents | 60 | queen-coordinator, test-architect, coverage-specialist, quality-gate, flaky-hunter, chaos-engineer, etc. | +| QE Skills | 57 | qe-test-generation, qe-coverage-analysis, qe-quality-assessment, strict-tdd, coverage-guard, etc. | +| QCSD Phases | 5 | Ideation, Refinement, Development, CI/CD, Production swarms | +| MCP Tools | 67 | fleet_init, test_generate_enhanced, coverage_analyze_sublinear, quality_assess, security_scan_comprehensive | +| Domains | 12 | test-generation, coverage-analysis, quality-assessment, defect-intelligence, security-compliance, chaos-resilience, etc. | +| Learning | 150K+ patterns | ReasoningBank, HNSW vector search, pattern promotion, experience replay | + +--- + +## Six Thinking Hats Analysis + +### White Hat — Facts + +**Superplane's current testing state:** +- Go backend has tests (`make test`) and E2E tests (`make test.e2e`) using Playwright +- Frontend uses Vitest, has Storybook 9 for component stories +- No visible quality gates in CI/CD pipeline +- No coverage thresholds enforced +- No automated test generation integrated into the workflow +- Canvas workflows have no built-in validation or testing framework +- 200+ database migrations with no migration test suite +- 45+ integrations with no contract tests between them +- AI agent (PydanticAI) has an `evals/` directory — evaluation framework exists but is early +- Expression engine (`expr-lang/expr`) has no fuzz testing +- RBAC (Casbin) policies have no automated verification + +**AQE capabilities ready to use:** +- `qe-test-architect` can generate tests for Go and TypeScript +- `qe-coverage-specialist` provides O(log n) coverage gap detection +- `qe-quality-gate` can enforce pass/fail thresholds +- `qe-contract-validator` can validate API contracts +- `qe-chaos-engineer` can inject faults +- `qe-flaky-hunter` can detect unreliable tests +- `qe-security-scanner` does SAST/DAST scanning +- QCSD swarms provide phase-based quality workflows + +### Red Hat — Gut Feelings + +- **Excited:** Superplane has no quality gates — adding them would be transformative and immediately valuable +- **Feeling:** The most impactful QE project would make Superplane's own CI/CD pipeline significantly better +- **Intuition:** Contract testing between the 45+ integrations is a gold mine — each integration talks to external APIs with no verification +- **Anxious:** 3 hours is tight — must pick something that shows results fast, not infrastructure setup +- **Strong sense:** The Canvas workflow linting/validation idea from the previous analysis crosses into QE territory nicely +- **Gut:** Demo should show red/green — failing quality gate turning green after fixes + +### Black Hat — Risks + +| Risk | Impact | Mitigation | +|------|--------|------------| +| 3 hours is very tight | High | Pick ideas that produce visible results in <1 hour | +| Go test generation may need Go expertise | Medium | Focus on TypeScript/frontend tests where Vitest is already set up | +| AQE fleet_init may take time | Low | Initialize once at start, reuse across all work | +| Coverage tools need actual test execution | Medium | Use existing test suites, don't build from scratch | +| Quality gate enforcement needs CI/CD access | Medium | Demo locally with CLI, document CI integration | +| gRPC proto contract testing is complex | High | Focus on REST/HTTP API contracts instead | + +### Yellow Hat — Strengths & Opportunities + +**What makes QE projects ideal for this hackathon:** +- Superplane is a workflow automation platform — quality gates for workflows are a natural feature +- AQE agents can generate tests autonomously — show AI writing tests for Superplane's own code +- The 45+ integrations each follow a pattern — one contract test template scales to all +- Existing Vitest setup means frontend tests can run immediately +- QCSD framework provides a structured narrative for the demo +- Quality gates would make Superplane more enterprise-ready — directly valuable to the team +- AQE's learning system can show pattern evolution during the hackathon itself + +### Green Hat — Creative Ideas (3-Hour Scope) + +--- + +## Top 8 QE Hackathon Ideas + +### 1. Quality Gates for Canvas Workflows + +**Add quality gate validation that runs before a canvas is published, catching errors before they hit production.** + +What to build: +- Pre-publish validation hook that analyzes a canvas before it goes live +- Checks: all nodes configured, no orphan nodes, no cycles in non-loop paths, required integrations connected, approval gates on destructive actions +- Severity levels: Error (blocks publish), Warning (allows with acknowledgment), Info +- UI: Badge on canvas showing gate status (red/yellow/green) +- CLI: `aqe quality assess --scope canvas` produces SARIF report + +Leverage: +- `qe-quality-gate` agent for gate logic +- `qe-quality-assessment` skill for scoring +- Existing canvas model API to fetch workflow structure + +Why 3 hours works: Canvas structure is available via API. Validation is pure logic — no external dependencies needed. UI badge is a small React change. + +| Effort | Demo Impact | AQE Agents Used | +|--------|-------------|-----------------| +| Low | High | qe-quality-gate, qe-risk-assessor | + +--- + +### 2. AI Test Generation for Superplane's Own Codebase + +**Use AQE agents to generate a test suite for an undertested part of Superplane, demonstrating AI-powered QE in action.** + +What to build: +- Pick a module (e.g., `pkg/components/`, `pkg/exprruntime/`, or `web_src/src/ui/`) +- Run `qe-test-architect` to analyze code and generate tests +- Run `qe-coverage-specialist` to find gaps before and after +- Show coverage improvement: before (X%) -> after (Y%) +- Generate a coverage report with risk-weighted gap analysis + +Leverage: +- `test_generate_enhanced` MCP tool +- `coverage_analyze_sublinear` MCP tool +- `qe-gap-detector` for finding what to test +- Vitest (frontend) or Go test (backend) for execution + +Why 3 hours works: AQE generates tests automatically. Pick a small, self-contained module. The "before/after" demo tells a clear story. + +| Effort | Demo Impact | AQE Agents Used | +|--------|-------------|-----------------| +| Low-Medium | High | qe-test-architect, qe-coverage-specialist, qe-gap-detector | + +--- + +### 3. Integration Contract Test Suite + +**Create contract tests for Superplane's top integrations ensuring API compatibility doesn't break silently.** + +What to build: +- Pick 3-5 integrations (GitHub, Slack, PagerDuty, Datadog, AWS) +- For each: capture the expected request/response schema from the integration code +- Generate consumer-driven contract tests using `qe-contract-validator` +- Validate that integration components send correct payloads and handle responses properly +- Show: "GitHub changed their API response? This contract test catches it before your workflow breaks" + +Leverage: +- `qe-contract-validator` agent +- `contract-testing` skill (Pact patterns) +- `api-testing-patterns` skill +- Integration source code in `pkg/integrations/` + +Why 3 hours works: Integration code follows a consistent pattern. Schema extraction is mechanical. 3-5 integrations is achievable. + +| Effort | Demo Impact | AQE Agents Used | +|--------|-------------|-----------------| +| Medium | High | qe-contract-validator, qe-integration-tester | + +--- + +### 4. QCSD Pipeline Demo — Full Quality Lifecycle + +**Demonstrate the complete QCSD (Quality-Completeness-Security-Deployment) lifecycle on a Superplane feature.** + +What to build: +- Pick a real feature area (e.g., the approval component or webhook trigger) +- **Ideation phase**: Run `qcsd-ideation-swarm` to generate quality criteria using HTSM v6.3 +- **Refinement phase**: Run `qcsd-refinement-swarm` to produce BDD scenarios and SFDIPOT analysis +- **Development phase**: Run `qcsd-development-swarm` to check TDD adherence, complexity, and coverage +- **CI/CD phase**: Run `qcsd-cicd-swarm` to enforce quality gates and assess deployment readiness +- **Production phase**: Run `qcsd-production-swarm` to define monitoring and feedback loops +- Output: Complete quality dossier for the feature + +Leverage: +- All 5 QCSD swarm skills +- Cross-phase feedback loops (strategic, tactical, operational, quality-criteria, learning) +- `qe-quality-criteria-recommender` for HTSM analysis +- `qe-product-factors-assessor` for SFDIPOT + +Why 3 hours works: Each QCSD phase takes ~30 minutes. The framework is already built — you're applying it, not building it. The demo tells a compelling narrative. + +| Effort | Demo Impact | AQE Agents Used | +|--------|-------------|-----------------| +| Medium | Very High | All QCSD swarms, qe-quality-criteria-recommender, qe-product-factors-assessor | + +--- + +### 5. Flaky Test Hunter & Auto-Stabilizer + +**Detect flaky tests in Superplane's test suite, analyze root causes, and auto-fix them.** + +What to build: +- Run Superplane's test suite multiple times to identify non-deterministic failures +- Use `qe-flaky-hunter` to classify flaky patterns (timing, ordering, shared state, resource contention) +- Use `qe-root-cause-analyzer` to diagnose each flaky test +- Use `qe-retry-handler` to implement intelligent retry with adaptive backoff +- Generate a report: X flaky tests found, Y root causes identified, Z auto-fixed +- PR with stabilization fixes + +Leverage: +- `qe-flaky-hunter` agent +- `qe-root-cause-analyzer` agent +- `qe-retry-handler` agent +- `test-failure-investigator` skill +- `qe-test-execution` skill for parallel runs + +Why 3 hours works: Test suite already exists. Running it multiple times is mechanical. Flaky test detection produces immediate, tangible results. + +| Effort | Demo Impact | AQE Agents Used | +|--------|-------------|-----------------| +| Medium | High | qe-flaky-hunter, qe-root-cause-analyzer, qe-retry-handler | + +--- + +### 6. Security Quality Gate for Integrations + +**Scan Superplane's 45+ integrations for security vulnerabilities: hardcoded secrets, injection risks, insecure HTTP, missing auth validation.** + +What to build: +- Run `qe-security-scanner` across `pkg/integrations/` directory +- Check for: credentials in code, HTTP instead of HTTPS, missing input validation, SQL injection in queries, unvalidated webhook payloads +- Run `security_scan_comprehensive` MCP tool for SAST analysis +- Generate SARIF report compatible with GitHub Code Scanning +- Create a security scorecard: each integration gets a grade (A-F) +- Fix the critical findings and show before/after + +Leverage: +- `qe-security-scanner` agent +- `qe-security-auditor` agent +- `security-testing` skill +- `security_scan_comprehensive` MCP tool +- `pentest-validation` skill for exploit verification + +Why 3 hours works: Scanning is automated. The 45 integrations follow a pattern, so findings scale. SARIF output is a standard format. Scorecard is a compelling visual. + +| Effort | Demo Impact | AQE Agents Used | +|--------|-------------|-----------------| +| Low-Medium | High | qe-security-scanner, qe-security-auditor | + +--- + +### 7. Expression Engine Fuzz Testing & Property Tests + +**Fuzz test Superplane's expression runtime (`expr-lang/expr`) to find edge cases and crashes in workflow expressions.** + +What to build: +- Analyze `pkg/exprruntime/` to understand the expression language +- Use `qe-property-tester` to generate property-based tests (e.g., "any valid expression should not panic", "nested access on nil should return error not crash") +- Use `qe-mutation-tester` to verify existing tests catch real bugs +- Fuzz with random/malformed expressions: deeply nested, Unicode, injection attempts +- Report: X edge cases found, Y potential crashes, Z security issues + +Leverage: +- `qe-property-tester` agent +- `qe-mutation-tester` agent +- `qe-test-architect` for test generation +- Go's built-in fuzzing (`go test -fuzz`) + +Why 3 hours works: Expression engines are perfect fuzz targets — small input surface, clear correctness criteria. Property tests are auto-generated. Go has native fuzz support. + +| Effort | Demo Impact | AQE Agents Used | +|--------|-------------|-----------------| +| Medium | High | qe-property-tester, qe-mutation-tester, qe-test-architect | + +--- + +### 8. Accessibility Audit of Canvas UI + +**Run a comprehensive accessibility audit on the Canvas UI and fix critical WCAG violations.** + +What to build: +- Use `qe-accessibility-auditor` to scan the Canvas UI pages +- Run axe-core analysis on key pages: canvas editor, run history, integration settings, organization management +- Check: keyboard navigation, screen reader compatibility, color contrast, focus management, ARIA labels on React Flow nodes +- Generate WCAG 2.2 compliance report with severity ratings +- Fix top 5-10 critical violations (missing alt text, focus traps, contrast issues) +- Before/after screenshots showing improvements + +Leverage: +- `qe-accessibility-auditor` agent +- `qe-visual-accessibility` skill +- `a11y-ally` skill +- `accessibility-testing` skill +- Existing Storybook for component-level testing + +Why 3 hours works: axe-core scanning is fast. Canvas UI is a single-page app — limited surface area. WCAG fixes are often small CSS/ARIA changes. Before/after demos well. + +| Effort | Demo Impact | AQE Agents Used | +|--------|-------------|-----------------| +| Low-Medium | Medium-High | qe-accessibility-auditor, qe-visual-tester | + +--- + +## Blue Hat — Action Plan + +### Top 3 Picks for a 3-Hour Hackathon + +| Rank | Project | Why | Time Estimate | +|------|---------|-----|---------------| +| **1** | **Quality Gates for Canvas Workflows** (#1) | Directly extends the product. Pure logic + small UI. No external deps. Most relevant to Superplane team. | ~2.5 hours | +| **2** | **AI Test Generation for Superplane** (#2) | Shows AQE in action on real code. "Before/after coverage" is a compelling metric. Minimal setup. | ~2 hours | +| **3** | **QCSD Pipeline Demo** (#4) | Tells the best story. Shows a complete quality lifecycle. Each phase builds on the last. | ~3 hours | + +### Best "Impress the Judges" Pick +**Quality Gates for Canvas Workflows** (#1) — It's a real product feature that the Superplane team would actually ship. Shows you understand the product AND quality engineering. + +### Best "Technical Depth" Pick +**Expression Engine Fuzz Testing** (#7) — Finding real bugs with property-based testing and fuzzing is technically impressive and produces concrete "we found X crashes" results. + +### Best "Breadth of QE" Pick +**QCSD Pipeline Demo** (#4) — Showcases 5 phases, 10+ agents, cross-phase learning. Demonstrates the full power of agentic quality engineering. + +### Suggested 3-Hour Plan + +| Time | Activity | +|------|----------| +| 0:00-0:15 | Initialize AQE fleet (`fleet_init`), pick your project | +| 0:15-0:30 | Read the relevant Superplane source code | +| 0:30-2:00 | Build (90 minutes of focused implementation) | +| 2:00-2:30 | Run demos, capture screenshots/metrics | +| 2:30-2:45 | Polish: write 3-slide pitch with before/after | +| 2:45-3:00 | Present | + +### Combining Ideas + +These ideas compose well. If your team has 2-3 people: + +- **Person A:** Quality Gates for Canvas (#1) — product feature +- **Person B:** AI Test Generation (#2) — coverage improvement +- **Person C:** Security Scan (#6) — security scorecard + +Together: "We added quality gates, improved test coverage by X%, and found Y security issues across 45 integrations — in 3 hours." diff --git a/docs/hackathon-ideas.md b/docs/hackathon-ideas.md new file mode 100644 index 0000000000..b48724dafd --- /dev/null +++ b/docs/hackathon-ideas.md @@ -0,0 +1,240 @@ +# Superplane Hackathon Ideas — Novi Sad 2026 + +Six Thinking Hats analysis of hackathon project ideas for the Superplane platform. + +## Platform Summary + +Superplane is an open-source DevOps control plane for event-based workflows: +- **Backend:** Go 1.25, gRPC + REST, PostgreSQL, RabbitMQ +- **Frontend:** React 19, TypeScript, React Flow (Canvas UI), Tailwind, shadcn +- **AI Agent:** Python 3.13, PydanticAI, Claude API (alpha) +- **45+ integrations:** GitHub, AWS, Slack, PagerDuty, Datadog, and more +- **Key gaps:** No native K8s integration, no workflow testing, no auto-error recovery, limited observability + +--- + +## Top 10 Ideas (Ranked by Feasibility x Impact x Demo-ability) + +### 1. Incident Copilot — AI-Powered First-5-Minutes Triage Agent + +**Build an autonomous incident response workflow that triggers on PagerDuty/Datadog alerts and uses AI to gather context, correlate signals, and propose actions.** + +- **Trigger:** PagerDuty `onIncident` or Datadog `onAlert` +- **Canvas flow:** Parallel fan-out to fetch recent deploys (GitHub), check dashboards (Datadog/Grafana), pull logs, check pod status (HTTP to K8s API) +- **AI node:** Claude component receives all context, produces structured triage summary +- **Output:** Posts evidence pack to Slack with severity assessment and recommended actions +- **Approval gate** before any remediation action + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Low-Medium | Very High | Canvas design, AI prompting | + +**Why it wins:** Directly on-theme. Uses existing integrations. Mostly canvas template + AI prompt engineering. No backend changes needed. + +--- + +### 2. NL2Workflow — Natural Language to Complete Canvas Generation + +**Type a sentence describing your workflow and get a fully wired canvas.** + +Example input: *"When a PR is merged to main, run tests, if they pass deploy to staging with a 10-minute canary, then promote to production with approval"* + +- Enhance the existing AI agent to produce complete canvas operations +- Leverage the pattern library + component catalog as context +- Generate canvas YAML, import into UI +- Interactive refinement: "Add a Slack notification if canary fails" + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Medium | Very High | Python, PydanticAI, prompt engineering | + +**Why it wins:** AI sidebar already exists but only does Q&A. Full generation is the natural next step. Jaw-dropping demo. + +--- + +### 3. Canvas Replay — Workflow Execution Debugger & Time-Travel UI + +**Visual execution replay: step through a workflow run node-by-node, seeing inputs/outputs/timing at each step.** + +- New UI panel showing execution timeline +- Click any node to see input payload, output, duration, errors +- "Play" button animates execution flow through the canvas +- Highlight bottlenecks (slow nodes in red) +- Compare two runs side-by-side (success vs failure) + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Medium | High | React, TypeScript, React Flow | + +**Why it wins:** Pure frontend work. Execution data already exists in the backend. Visually stunning demo. + +--- + +### 4. Workflow Test Runner — Test Framework for Canvases + +**Testing mode: define expected inputs/outputs for a canvas and run assertions without hitting real integrations.** + +- Mock mode for components (return predefined responses) +- Test definition: trigger event -> expected node execution order -> expected outputs +- "Test" button in Canvas UI runs the workflow in simulation +- Green/red indicators on each node (passed/failed) +- Coverage report: which paths were exercised + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Medium-High | High | Go (backend), React (frontend) | + +**Why it wins:** Fills a critical gap. Makes Superplane enterprise-ready. Shows deep product understanding. + +--- + +### 5. Kubernetes Operator Integration — Native K8s Triggers & Components + +**Add Kubernetes as a first-class integration.** + +- New integration in `pkg/integrations/kubernetes/` +- **Triggers:** `onPodCrashLoop`, `onDeploymentRollout`, `onHPAScale`, `onNodeNotReady` +- **Components:** `applyManifest`, `scaleDeployment`, `rollbackDeployment`, `getPodLogs` +- Uses K8s API via `client-go` + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Medium-High | High | Go, Kubernetes | + +**Why it wins:** K8s is THE missing integration. Every platform engineer wants this. + +--- + +### 6. Self-Healing Workflows — AI Error Recovery Agent + +**When a workflow node fails, an AI agent analyzes the error, suggests a fix, and can auto-retry with corrected parameters.** + +- Intercept node execution failures in the worker +- Pass error context + node config to Claude component +- AI proposes: retry with different params, skip node, alert human, or rollback +- Configurable autonomy level per canvas: "suggest only" / "auto-fix with approval" / "full auto" +- Audit log of all AI decisions + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Medium-High | Very High | Go (workers), AI prompting | + +**Why it wins:** Makes workflows resilient without manual intervention. Novel feature no competitor has. + +--- + +### 7. Integration Marketplace — Community Component Store + +**UI where users can browse, install, and publish custom components/integrations.** + +- Browse page with categories, search, popularity +- One-click install (downloads integration config) +- Publish: package a custom HTTP-based integration as a template +- Rating/review system +- Featured workflows section + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Medium | Medium-High | React, API design | + +**Why it wins:** Creates ecosystem/community value. Pure frontend + API work. + +--- + +### 8. GitOps for Workflows — Canvas-as-Code with Git Sync + +**Store canvas definitions in Git, sync bidirectionally, enable PR-based workflow changes with diff visualization.** + +- Export canvas to YAML in a Git repo (GitHub integration exists) +- Watch for YAML changes, auto-update canvas +- PR workflow: propose canvas change as YAML diff, visual diff in Superplane UI +- Branch-based canvas environments (staging vs production) + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Medium | High | Go, Git APIs, React | + +**Why it wins:** GitOps is how infrastructure teams already work. Bridges visual editing and code review. + +--- + +### 9. Workflow Analytics Dashboard — Execution Intelligence + +**Real-time dashboard: success rates, execution times, failure patterns, cost estimates, anomaly detection.** + +- Aggregate execution data from existing tables +- Charts: success/fail ratio, p50/p95 duration, failure heatmap by node +- Anomaly detection: flag unusually slow or failing runs +- Cost estimation: track API calls, compute time per workflow +- Weekly digest email + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Medium | Medium-High | React, SQL, charting | + +**Why it wins:** Addresses observability gap. Data already exists. Visual and data-rich demo. + +--- + +### 10. Workflow Linter — Static Analysis for Canvases + +**Analyze workflows for common mistakes before execution.** + +- Graph analysis: detect cycles, orphan nodes, missing connections +- Config validation: required fields empty, invalid expressions, deprecated components +- Security checks: secrets in plaintext, missing approval gates +- Performance hints: suggest parallelization, flag long chains +- Inline warnings on Canvas nodes (yellow/red badges) + +| Effort | Demo Impact | Skills Needed | +|--------|-------------|---------------| +| Low-Medium | Medium-High | Go or TypeScript, graph algorithms | + +**Why it wins:** Low complexity, high value. Quick to build and demo. Makes Superplane feel enterprise-grade. + +--- + +## Six Thinking Hats Summary + +### White Hat (Facts) +- 45+ integrations, gRPC+REST API, React Flow canvas, PydanticAI agent (alpha) +- Key gaps: no K8s, no workflow testing, no auto-recovery, no anomaly detection + +### Red Hat (Gut Feelings) +- AI + Canvas combo will have the biggest wow-factor +- Incident response is emotionally compelling (everyone hates 3am pages) +- Projects that demo well in 5 minutes will win + +### Black Hat (Risks) +- One-day scope: overambitious projects won't finish +- Go backend requires Go expertise for deep changes +- Local dev setup may eat hours (Docker, Postgres, RabbitMQ) +- Canvas UI is a 228KB monolith, risky to modify deeply + +### Yellow Hat (Strengths) +- Integration registry is pluggable (clear pattern to follow) +- AI agent framework exists, extending it is incremental +- Expression engine enables powerful data transformation +- Strong API with auto-generated TypeScript client + +### Green Hat (Creative Ideas) +- See the 10 ideas above + +### Blue Hat (Action Plan) +- **Best overall pick:** Incident Copilot (#1) — low risk, high demo impact +- **Best AI pick:** NL2Workflow (#4) — extends existing agent +- **Best frontend pick:** Canvas Replay (#3) — pure React, data exists +- **Impress the core team:** Workflow Test Runner (#4) or Workflow Linter (#10) +- **Strong Go skills:** Kubernetes Integration (#5) or Self-Healing Workflows (#6) + +--- + +## Suggested Day Plan + +| Time | Activity | +|------|----------| +| 0:00-0:30 | Environment setup (`make dev.setup && make dev.start`) | +| 0:30-1:00 | Familiarize with Canvas UI, create a test workflow | +| 1:00-5:00 | Build your project (pick ONE from above) | +| 5:00-5:30 | Polish demo, write 3-slide pitch | +| 5:30-6:00 | Present | diff --git a/docs/hackathon-plan.md b/docs/hackathon-plan.md new file mode 100644 index 0000000000..49e0831081 --- /dev/null +++ b/docs/hackathon-plan.md @@ -0,0 +1,165 @@ +# Professorianci — SuperPlane Hackathon Plan + +**Team:** Dragan, Braca, Fedja +**Date:** March 28, 2026 — Novi Sad +**Project:** Incident Copilot + Workflow Quality Gates + +--- + +## The Pitch (One Sentence) + +We build an AI-powered Incident Copilot that auto-triages production alerts, AND a workflow quality gate that ensures the copilot (and every canvas) is safe before it goes live. + +--- + +## What We Built + +### Track A — Incident Copilot (Dragan) +An autonomous incident response workflow in SuperPlane Canvas: +- **Trigger:** PagerDuty `onIncident` (filtered to P1/P2 only) +- **Fan-out:** 3 parallel nodes fetch recent deploys (GitHub), metrics (Datadog), incident timeline (PagerDuty) +- **Merge:** Waits for all 3 data sources (2-minute timeout) +- **AI Triage:** Claude component receives all context, produces structured severity assessment with root cause hypotheses +- **Output:** Slack message with evidence pack to `#hackathon-demo` channel +- **Approval gate** before any remediation action +- **4 annotation widgets** explaining each stage + +**File:** `templates/canvases/incident-copilot.yaml` (13 nodes, 10 edges) + +### Track B — Workflow Quality Gate (Braca) +Static analysis engine that validates ANY canvas — implemented as both a Go backend package and TypeScript frontend linter with full parity: + +**9 Lint Rules:** +| Rule | Severity | What it catches | +|------|----------|----------------| +| `duplicate-node-id` | error | Two nodes sharing the same ID | +| `duplicate-node-name` | warning | Ambiguous expression references | +| `invalid-edge` | error | Dangling refs, self-loops, duplicate edges, widget endpoints | +| `cycle-detected` | error | Circular dependencies in workflow graph (Kahn's algorithm) | +| `orphan-node` | warning | Nodes not reachable from any trigger (BFS) | +| `dead-end` | warning | Non-terminal nodes with no outgoing edges | +| `missing-approval-gate` | error | Destructive actions without upstream approval (reverse BFS) | +| `missing-required-config` | error/warn | Empty prompts, missing channels, single-input merges | +| `invalid-expression` | error/warn | Unbalanced `{{ }}`, references to non-existent nodes | +| `unreachable-branch` | info | Filter nodes with no default outgoing edge | + +**Quality Scoring:** +- Score 0-100 with letter grades A-F +- Per-category caps: errors max -60pts, warnings max -30pts, info max -10pts +- All 3 existing templates score Grade A + +**Integration Points:** +- **Pre-save quality gate** — logs quality issues on every canvas save (warn-only, never blocks) +- **REST API** — `POST /api/v1/canvases/{id}/lint` returns full lint result as JSON +- **Frontend badge** — green/red badge in canvas header with tooltip showing all issues and quality score +- **36 unit tests** including dogfood tests against all 3 existing templates + +**Files:** +- `pkg/linter/linter.go` — Go linter engine (9 rules, quality scoring) +- `pkg/linter/linter_test.go` — 36 tests, all passing +- `pkg/grpc/actions/canvases/lint_canvas.go` — REST API handler +- `pkg/grpc/actions/canvases/update_canvas_version.go` — pre-save quality gate +- `pkg/public/server.go` — route registration +- `web_src/src/utils/canvasLinter.ts` — TypeScript linter (full parity with Go) +- `web_src/src/ui/CanvasPage/Header.tsx` — quality gate badge UI + +### Track C — Demo & Glue (Fedja) +- 4 mock data files for realistic demo scenario +- Slack channel configured: `#hackathon-demo` (C0APV7H889F) +- Demo script with quality gates narrative + +**Files:** +- `docs/mock-incident.json` — PagerDuty incident payload (API Gateway 5xx spike) +- `docs/mock-github-release.json` — GitHub release v2.14.3 +- `docs/mock-datadog-metrics.json` — Error rate, latency, request count time series +- `docs/mock-pagerduty-logs.json` — Incident timeline log entries + +--- + +## Why This Wins + +1. **Incident Copilot** is directly on-theme (AI + automation + production systems) +2. **Quality Gate** makes SuperPlane enterprise-ready — real product value +3. Together they tell one story: "We built the feature AND the safety net" +4. Every DevOps person relates to 3am incident pages +5. Demo is visual and compelling — canvas workflow + Slack output + red/green quality badge +6. Quality scoring (A-F grades) gives an instant readability to canvas health +7. Full Go + TypeScript parity means the badge is always accurate + +--- + +## Demo Script (5 minutes) + +### Slide 1 — The Problem (30 seconds) +"It's 3am. PagerDuty fires. Your engineer wakes up, spends 20 minutes across 5 dashboards gathering context before they even understand what's happening." + +### Live Demo — The Incident Copilot (90 seconds) +1. Show the canvas: "Here's our Incident Copilot — built entirely in SuperPlane's Canvas" +2. Walk through the flow: trigger, filter, parallel data collection, merge, AI triage, Slack output, approval gate +3. Point out the **green quality badge** in the header: "Score: 100/100, Grade A — this workflow is validated before it goes live" +4. Fire the webhook: `curl -X POST http://localhost:8000/api/v1/webhooks/ -H "Content-Type: application/json" -d @docs/mock-incident.json` +5. Watch nodes light up in real-time +6. Switch to Slack: show the evidence pack arriving with severity assessment +7. "47 seconds. From alert to actionable triage." + +### Live Demo — The Quality Gate (90 seconds) +1. "But how do you know this workflow is safe before it goes live?" +2. Show the green badge: "Quality Gate: A (100/100)" +3. **Break something:** Delete an edge — badge turns red immediately: "1 error — orphan node detected" +4. **Fix it:** Reconnect the edge — badge turns green again +5. **Break differently:** Remove the approval gate — badge shows: "Destructive action has no upstream approval gate" +6. **Show the API:** `curl -X POST http://localhost:8000/api/v1/canvases//lint` — show JSON output with quality score +7. "Every canvas gets a quality score. Errors are caught before they reach production." + +### Live Demo — Deep Validation (30 seconds) +1. Add a node with an expression referencing a non-existent node — badge catches it +2. Create a cycle — badge catches it +3. "9 rules, from graph cycles to expression validation. The linter catches what humans miss." + +### Slide 2 — What We Built (30 seconds) +- Incident Copilot: AI triage in < 60 seconds vs 20 minutes manual +- Quality Gate: 9 lint rules, quality scoring (A-F), REST API, real-time badge +- Full Go + TypeScript parity — backend and frontend always agree +- 36 unit tests, all 3 existing templates pass with Grade A +- Zero backend changes needed for copilot, minimal changes for quality gate + +### Slide 3 — What's Next (30 seconds) +- Quality gate as a pre-publish hook (block publish when grade < C) +- Linter as a built-in SuperPlane feature +- Copilot templates for common incident types (database, network, deployment) +- Self-healing: AI suggests workflow fixes when linter finds issues +- Integration contract tests using the same quality gate framework + +--- + +## Pre-Hackathon Checklist + +- [x] All three: clone repo, run `make dev.setup && make dev.start` +- [x] Dragan: review Canvas API and available integration components +- [x] Braca: review canvas data model (how nodes/edges are stored) +- [x] Fedja: find a real PagerDuty/Datadog alert payload format for mock data +- [x] All: agree on a Slack channel for demo output (`#hackathon-demo` — C0APV7H889F) + +--- + +## Technical Stats + +| Metric | Value | +|--------|-------| +| Go lines written | ~1,200 (linter + API + tests) | +| TypeScript lines written | ~400 (frontend linter + badge) | +| YAML template lines | ~280 (incident copilot) | +| Mock data files | 4 JSON files | +| Lint rules | 9 (full Go/TS parity) | +| Unit tests | 36 (all passing) | +| Template quality scores | 100/A, 100/A, 95/A | +| Devil's advocate reviews | 2 rounds, 26 issues found and fixed | + +--- + +## Fallback Plan + +If anything goes wrong with the full Incident Copilot: +- Simplify to just 2 nodes: trigger -> AI triage -> Slack (skip the parallel fan-out) +- The quality gate stands on its own as a valuable feature regardless +- Worst case: quality gate demo + copilot design walkthrough still tells the story diff --git a/docs/hackathon-reference-braca.md b/docs/hackathon-reference-braca.md new file mode 100644 index 0000000000..f73ede0111 --- /dev/null +++ b/docs/hackathon-reference-braca.md @@ -0,0 +1,241 @@ +# Track B Reference — Workflow Linter / Quality Gate (Braca) + +## Existing Validation (What Superplane Already Does) + +There's already validation in `pkg/grpc/actions/canvases/serialization.go` (lines 197-326). The linter should go **beyond** this with deeper semantic checks. + +**Already validated (don't duplicate):** +- Unique node IDs +- Node names present +- Component/trigger references exist in registry +- Edge source/target IDs exist +- Widgets not used as edge source/target +- Cycle detection (`CheckForCycles`) +- Group widget validation (no nesting, no self-reference) +- Basic config validation against component schema + +## What the Linter Should Add + +### 1. Orphan Node Detection +Nodes not reachable from any trigger (no path from root). + +```go +// Find root nodes (triggers with no incoming edges) +func FindOrphanNodes(nodes []Node, edges []Edge) []Node { + // Build adjacency: reachable set from all triggers + triggers := findTriggerNodes(nodes) + reachable := bfs(triggers, edges) + + var orphans []Node + for _, n := range nodes { + if n.Type == "widget" { continue } // groups are OK + if !reachable[n.ID] { + orphans = append(orphans, n) + } + } + return orphans +} +``` + +### 2. Dead-End Detection +Nodes with no outgoing edges that aren't terminal (Slack, email, approval, etc.). + +### 3. Missing Approval Before Destructive Actions +Destructive components that should have an approval gate upstream: +- `pagerduty.resolveIncident` +- `pagerduty.escalateIncident` +- `github.deleteRelease` +- `github.createRelease` +- Any HTTP DELETE/PUT to production URLs +- SSH commands + +Check: walk the graph backwards from these nodes — is there an `approval` component in the path? + +### 4. Missing Required Configuration +Go beyond basic "field required" — check semantic requirements: +- Claude `textPrompt` with empty `prompt` field +- HTTP component with no `url` +- Slack `sendTextMessage` with no `channel` +- Merge component with only 1 incoming edge (pointless merge) + +### 5. Expression Syntax Validation +Validate expression strings without executing them: +- Balanced `{{ }}` delimiters +- Valid `$['Node Name']` references point to actual node names +- `root()`, `previous()` used correctly + +### 6. Unreachable Branches +After an `if` component, check that both true/false branches lead somewhere. + +## Canvas Data Model + +### Node Structure (`pkg/models/blueprint.go`) +```go +type Node struct { + ID string + Name string + Type string // "trigger", "component", "blueprint", "widget" + Ref NodeRef // exactly one of: Component, Blueprint, Trigger, Widget + Configuration map[string]any + Metadata map[string]any + Position Position // {X, Y} + IsCollapsed bool + IntegrationID *string + ErrorMessage *string + WarningMessage *string +} + +type NodeRef struct { + Component *ComponentRef // {Name: "http"} + Blueprint *BlueprintRef // {ID: "..."} + Trigger *TriggerRef // {Name: "pagerduty.onIncident"} + Widget *WidgetRef // {Name: "group"} +} +``` + +### Edge Structure +```go +type Edge struct { + SourceID string // upstream node ID + TargetID string // downstream node ID + Channel string // "default", "success", "fail", "approved", "rejected", etc. +} +``` + +### Canvas Version (where nodes/edges live) +```go +type CanvasVersion struct { + Nodes []Node + Edges []Edge + // ... metadata +} +``` + +### Accessing Canvas via API +``` +GET /api/v1/canvases/{id} -> Canvas with live version spec +GET /api/v1/canvases/{id}/spec -> Just the nodes and edges +``` + +**Proto:** `protos/canvases.proto` — `Canvas.Spec` contains `repeated Node nodes` and `repeated Edge edges` + +## Component Configuration Schema + +Each component defines its config via `Configuration() []configuration.Field`: + +```go +type Field struct { + Name string + Label string + Type string // "string", "number", "boolean", "select", "expression", "text", etc. + Required bool + Default any + Sensitive bool +} +``` + +**Existing validation:** `pkg/configuration/validation.go` → `ValidateConfiguration(fields, config)` + +The registry at `pkg/registry/registry.go` has all components: +```go +Registry.Components // map[string]core.Component +``` + +## Graph Traversal Helpers + +Already in `pkg/models/blueprint.go`: +```go +FindEdges(sourceID, channel string) []Edge // outgoing edges from node +FindRootNode() *Node // node with no incoming edges +``` + +## Linter Output Format + +Suggested structure: +```json +{ + "status": "fail", + "errors": [ + { + "severity": "error", + "rule": "orphan-node", + "nodeId": "abc123", + "nodeName": "Unused HTTP Call", + "message": "Node is not reachable from any trigger" + } + ], + "warnings": [ + { + "severity": "warning", + "rule": "missing-approval-gate", + "nodeId": "def456", + "nodeName": "Delete Release", + "message": "Destructive action 'github.deleteRelease' has no upstream approval gate" + } + ], + "info": [ + { + "severity": "info", + "rule": "single-input-merge", + "nodeId": "ghi789", + "nodeName": "Wait for all", + "message": "Merge node has only 1 incoming edge — consider removing" + } + ], + "summary": { + "total": 3, + "errors": 1, + "warnings": 1, + "info": 1 + } +} +``` + +## Implementation Options + +### Option A: Go Package (recommended) +Add `pkg/linter/linter.go` with: +```go +func LintCanvas(nodes []models.Node, edges []models.Edge, registry *registry.Registry) *LintResult +``` +- Can access component registry for config validation +- Can be called from gRPC action (new API endpoint) +- Can be wired into pre-publish hook + +### Option B: TypeScript (frontend-only) +Add `web_src/src/utils/canvasLinter.ts`: +- Operates on the React Flow node/edge data already in memory +- Shows results inline in Canvas UI immediately +- No backend changes needed +- BUT: no access to component config schema + +### Option C: Both +- Go backend for deep validation (config schema, expression parsing) +- TypeScript frontend for instant visual feedback (orphans, dead-ends) + +For 3 hours, **Option A or B alone is sufficient**. Pick based on comfort. + +## Key Source Files + +| File | What to look at | +|------|-----------------| +| `pkg/models/blueprint.go:121-167` | Node, Edge, NodeRef structs | +| `pkg/grpc/actions/canvases/serialization.go:197-326` | Existing validation to extend | +| `pkg/configuration/field.go` | Config field schema | +| `pkg/configuration/validation.go` | Config validation logic | +| `pkg/core/component.go:70` | Component interface (Configuration method) | +| `pkg/registry/registry.go` | Component registry | +| `pkg/components/approval/approval.go` | Approval component | +| `pkg/components/merge/merge.go` | Merge component | +| `protos/canvases.proto` | Canvas proto definition | +| `protos/components.proto` | Node/Edge proto definition | + +## "Eat Our Own Dogfood" Demo + +At 2:15, run the linter against Dragan's Incident Copilot canvas: +1. It should PASS (green) — copilot is well-formed +2. Remove an edge → run again → catches orphan node (red) +3. Remove the approval gate → run again → warns about missing approval before destructive action +4. Fix → green again + +This is the money shot for the demo. diff --git a/docs/hackathon-reference-dragan.md b/docs/hackathon-reference-dragan.md new file mode 100644 index 0000000000..fbe2035e4d --- /dev/null +++ b/docs/hackathon-reference-dragan.md @@ -0,0 +1,184 @@ +# Track A Reference — Incident Copilot (Dragan) + +## Canvas Flow Design + +``` +PagerDuty onIncident (trigger) + | + v +Filter (P1/P2 only) + | + +---> GitHub getRelease (latest deploy) + +---> HTTP GET (Datadog metrics API) + +---> PagerDuty listLogEntries (incident timeline) + | + v +Merge (wait for all 3) + | + v +Claude textPrompt (AI triage) + | + v +Slack sendTextMessage (evidence pack) + | + v +Approval (before remediation) +``` + +## Existing Templates to Reference + +Two incident templates already exist — use them as a starting point: + +- `templates/canvases/incident-data-collection.yaml` — PagerDuty trigger + parallel data fetch + AI summary + GitHub issue +- `templates/canvases/incident-router.yaml` — Slack mention + AI title generation + PagerDuty/GitHub parallel create + +## PagerDuty Trigger + +**Trigger name:** `pagerduty.onIncident` + +**Events:** `incident.triggered`, `incident.acknowledged`, `incident.resolved` + +**Config fields:** +- `service` (required): PagerDuty service to monitor +- `events` (required): which incident events to listen for +- `urgencies` (optional): filter by "high" or "low" + +**Example payload (what downstream nodes receive):** +```json +{ + "data": { + "incident": { + "id": "PGR0VU2", + "number": 2, + "title": "A little bump in the road", + "status": "triggered", + "urgency": "high", + "service": { "id": "PF9KMXH", "summary": "API Service" }, + "assignees": [{ "id": "PTUXL6G", "summary": "User 123" }], + "escalation_policy": { "id": "PUS0KTE", "summary": "Default" }, + "teams": [{ "id": "PFCVPS0", "summary": "Engineering" }], + "priority": { "id": "PSO75BM", "summary": "P1" } + }, + "agent": { "id": "PLH1HKV", "summary": "Tenex Engineer" } + } +} +``` + +## Filter Node (P1/P2 Only) + +**Expression:** +``` +$["Listen for incidents"].data.incident.priority.summary == "P1" || $["Listen for incidents"].data.incident.priority.summary == "P2" +``` + +## Claude AI Triage Node + +**Component:** `claude.textPrompt` + +**Config:** +``` +model: claude-3-5-sonnet-latest (or claude-opus-4-6) +maxTokens: 4096 +temperature: 0.3 +``` + +**System message:** +``` +You are an expert SRE incident triage assistant. Given incident details, recent deployments, metrics, and logs, produce a structured triage report with: + +1. SEVERITY ASSESSMENT (P1-P4 with justification) +2. LIKELY ROOT CAUSE (top 3 hypotheses ranked by probability) +3. AFFECTED SYSTEMS (services, endpoints, user segments) +4. RECOMMENDED ACTIONS (ordered by priority, with estimated impact) +5. ESCALATION RECOMMENDATION (who to page, what team) + +Be concise. Use bullet points. Include specific evidence for each claim. +``` + +**Prompt (using expressions):** +``` +INCIDENT: +Title: {{ $['Listen for incidents'].data.incident.title }} +Status: {{ $['Listen for incidents'].data.incident.status }} +Urgency: {{ $['Listen for incidents'].data.incident.urgency }} +Priority: {{ $['Listen for incidents'].data.incident.priority.summary }} +Service: {{ $['Listen for incidents'].data.incident.service.summary }} +Assigned to: {{ $['Listen for incidents'].data.incident.assignees }} + +RECENT DEPLOYMENT: +{{ $['Get latest release'].data }} + +METRICS: +{{ $['Fetch metrics'].data }} + +INCIDENT LOG: +{{ $['Get incident logs'].data }} +``` + +## Slack Output Node + +**Component:** `slack.sendTextMessage` + +**Config:** +- `channel`: your demo Slack channel +- `text` (using expressions): + +``` +:rotating_light: *INCIDENT TRIAGE — AUTO-GENERATED* + +*{{ $['Listen for incidents'].data.incident.title }}* +Priority: {{ $['Listen for incidents'].data.incident.priority.summary }} +Service: {{ $['Listen for incidents'].data.incident.service.summary }} + +--- + +{{ $['AI Triage'].data.text }} + +--- + +_Triage generated by SuperPlane Incident Copilot in < 60 seconds_ +``` + +## Approval Gate + +**Component:** `approval` + +**Config:** +```json +{ + "items": [ + { "type": "anyone" } + ] +} +``` + +Output channels: `approved` / `rejected` + +## Expression Syntax Quick Reference + +| Pattern | Meaning | +|---------|---------| +| `$['Node Name'].data.field` | Access upstream node output | +| `root().data` | Access trigger event data | +| `previous().data` | Access immediate previous node | +| `{{ expression }}` | Template interpolation in text fields | + +## Key Source Files + +| File | What to look at | +|------|-----------------| +| `templates/canvases/incident-data-collection.yaml` | Working incident template | +| `templates/canvases/incident-router.yaml` | Working routing template | +| `pkg/integrations/pagerduty/example_data_on_incident.json` | Full payload example | +| `pkg/integrations/claude/claude.go` | Claude component config | +| `pkg/integrations/slack/send_text_message.go` | Slack message component | +| `pkg/components/merge/merge.go` | Fan-in merge component | +| `pkg/components/approval/approval.go` | Approval gate | + +## Fallback + +If parallel fan-out + merge is too complex: +``` +PagerDuty trigger -> Claude textPrompt (just pass raw incident data) -> Slack message +``` +This 3-node flow works and still demos the core value. diff --git a/docs/hackathon-reference-fedja.md b/docs/hackathon-reference-fedja.md new file mode 100644 index 0000000000..ddc7bfd2a8 --- /dev/null +++ b/docs/hackathon-reference-fedja.md @@ -0,0 +1,259 @@ +# Track C Reference — Demo & Glue (Fedja) + +## Mock PagerDuty Incident Payload + +Use this to trigger the Incident Copilot without a real PagerDuty account. +Send as POST to the canvas webhook URL. + +```json +{ + "event": { + "id": "01DEN4HPBQAAAG05V5QQYBRZMF", + "event_type": "incident.triggered", + "resource_type": "incident", + "occurred_at": "2026-03-28T14:30:00.000Z", + "agent": { + "html_url": "https://acme.pagerduty.com/users/PLH1HKV", + "id": "PLH1HKV", + "self": "https://api.pagerduty.com/users/PLH1HKV", + "summary": "Monitoring Bot", + "type": "user_reference" + }, + "data": { + "id": "PGR0VU2", + "type": "incident", + "self": "https://api.pagerduty.com/incidents/PGR0VU2", + "html_url": "https://acme.pagerduty.com/incidents/PGR0VU2", + "number": 42, + "status": "triggered", + "incident_key": "hackathon-demo-incident-001", + "created_at": "2026-03-28T14:30:00Z", + "title": "API Gateway: 5xx error rate spike to 15% on /api/v1/orders", + "urgency": "high", + "service": { + "html_url": "https://acme.pagerduty.com/services/PF9KMXH", + "id": "PF9KMXH", + "self": "https://api.pagerduty.com/services/PF9KMXH", + "summary": "API Gateway (Production)", + "type": "service_reference" + }, + "assignees": [ + { + "html_url": "https://acme.pagerduty.com/users/PTUXL6G", + "id": "PTUXL6G", + "self": "https://api.pagerduty.com/users/PTUXL6G", + "summary": "Dragan Petrovic (On-Call SRE)", + "type": "user_reference" + } + ], + "escalation_policy": { + "html_url": "https://acme.pagerduty.com/escalation_policies/PUS0KTE", + "id": "PUS0KTE", + "self": "https://api.pagerduty.com/escalation_policies/PUS0KTE", + "summary": "Production - Critical", + "type": "escalation_policy_reference" + }, + "teams": [ + { + "html_url": "https://acme.pagerduty.com/teams/PFCVPS0", + "id": "PFCVPS0", + "self": "https://api.pagerduty.com/teams/PFCVPS0", + "summary": "Platform Engineering", + "type": "team_reference" + } + ], + "priority": { + "html_url": "https://acme.pagerduty.com/priorities/PSO75BM", + "id": "PSO75BM", + "self": "https://api.pagerduty.com/priorities/PSO75BM", + "summary": "P1", + "type": "priority_reference" + }, + "conference_bridge": { + "conference_number": "+1 555-123-4567,,987654321#", + "conference_url": "https://meet.google.com/abc-defg-hij" + }, + "body": { + "type": "incident_body", + "details": "5xx error rate on API Gateway spiked from 0.1% to 15.3% at 14:28 UTC. Affects /api/v1/orders endpoint. 1,247 users impacted in last 2 minutes. Correlated with deployment deploy-api-v2.14.3 at 14:25 UTC." + } + } + } +} +``` + +## Mock GitHub Release (Latest Deploy) + +If using HTTP component to simulate GitHub data, return this: + +```json +{ + "id": 12345678, + "tag_name": "v2.14.3", + "name": "Release v2.14.3 - Order Service Refactor", + "body": "## Changes\n- Refactored order validation logic\n- Migrated to new payment gateway client\n- Updated database connection pooling\n\n## Authors\n- @braca (order validation)\n- @fedja (payment gateway)\n\n## Risk: Medium\nDatabase connection pool size changed from 20 to 50", + "draft": false, + "prerelease": false, + "created_at": "2026-03-28T14:25:00Z", + "published_at": "2026-03-28T14:25:30Z", + "author": { + "login": "braca", + "id": 87654321 + } +} +``` + +## Mock Datadog Metrics Response + +For the HTTP component fetching Datadog metrics: + +```json +{ + "series": [ + { + "metric": "api.gateway.error_rate_5xx", + "points": [ + [1711633200, 0.1], + [1711633260, 0.3], + [1711633320, 2.1], + [1711633380, 8.7], + [1711633440, 15.3], + [1711633500, 14.8] + ], + "tags": ["service:api-gateway", "env:production"] + }, + { + "metric": "api.gateway.latency_p99", + "points": [ + [1711633200, 120], + [1711633260, 145], + [1711633320, 890], + [1711633380, 2340], + [1711633440, 4500], + [1711633500, 4200] + ], + "tags": ["service:api-gateway", "env:production"] + } + ], + "status": "ok", + "query": "avg:api.gateway.error_rate_5xx{env:production} by {service}" +} +``` + +## Mock PagerDuty Log Entries + +```json +{ + "log_entries": [ + { + "type": "trigger_log_entry", + "created_at": "2026-03-28T14:30:00Z", + "summary": "Triggered by Datadog monitor: API 5xx Error Rate > 5%" + }, + { + "type": "notify_log_entry", + "created_at": "2026-03-28T14:30:05Z", + "summary": "Notified Dragan Petrovic via push notification" + }, + { + "type": "annotate_log_entry", + "created_at": "2026-03-28T14:30:10Z", + "summary": "Correlated with deploy-api-v2.14.3 (14:25 UTC)" + } + ] +} +``` + +## Slack Evidence Pack Template + +What the final Slack message should look like: + +``` +:rotating_light: *INCIDENT TRIAGE — AUTO-GENERATED* + +*API Gateway: 5xx error rate spike to 15% on /api/v1/orders* +Priority: P1 | Service: API Gateway (Production) +Assignee: Dragan Petrovic (On-Call SRE) + +--- + +*SEVERITY: P1 — Critical* +Customer-facing order flow is down for ~1,200 users. + +*LIKELY ROOT CAUSE:* +1. (85%) Deploy v2.14.3 changed DB connection pool 20->50, likely exhausting DB connections +2. (10%) Payment gateway client migration introduced timeout regression +3. (5%) Unrelated infrastructure issue + +*AFFECTED SYSTEMS:* +- API Gateway /api/v1/orders endpoint +- Order Service (downstream) +- ~1,247 active users in checkout flow + +*RECOMMENDED ACTIONS:* +1. :arrow_right: Rollback deploy v2.14.3 immediately (ETA: 3 min) +2. Check DB connection count: `SELECT count(*) FROM pg_stat_activity` +3. Monitor error rate after rollback for 5 min +4. If not resolved, escalate to Database Team + +*ESCALATION:* +- Current: Platform Engineering (Dragan) +- Next: Database Team (@db-oncall) if rollback doesn't resolve + +--- +:clock1: Triage generated by SuperPlane Incident Copilot in 47 seconds +:link: +``` + +## Demo Script — Detailed + +### Setup (before demo starts) +1. Have the canvas open in browser, zoomed to show full flow +2. Have Slack channel open in a second tab/window +3. Have a `curl` command ready to fire the mock webhook + +### Act 1: The Problem (30 seconds) +"It's 3am. PagerDuty fires. Your engineer opens 5 tabs: PagerDuty, Datadog, GitHub, the runbook, Slack. Spends 20 minutes gathering context before understanding the problem. We fixed that." + +### Act 2: The Copilot (90 seconds) +1. Show the canvas: "Here's our Incident Copilot — built entirely in SuperPlane's Canvas" +2. Walk through the flow: trigger, parallel data collection, AI triage, Slack output +3. Fire the webhook: `curl -X POST -H "Content-Type: application/json" -d @mock-incident.json` +4. Watch nodes light up in real-time (canvas execution visualization) +5. Switch to Slack: show the evidence pack arriving +6. "47 seconds. From alert to actionable triage." + +### Act 3: The Safety Net (60 seconds) +1. "But how do you know this workflow is safe before it goes live?" +2. Run the linter: show green pass +3. Delete an edge in the canvas +4. Run linter again: red fail — "Orphan node detected" +5. Remove the approval gate +6. Run linter again: warning — "Destructive action without approval" +7. "The linter catches mistakes before they reach production." + +### Act 4: What's Next (30 seconds) +- Linter as a built-in pre-publish hook +- Template library for common incident types +- Self-healing: AI suggests fixes when linter finds issues + +## Screenshot Checklist + +Capture these during the build (2:15-2:30): +- [ ] Full canvas view with all nodes connected +- [ ] Canvas with nodes executing (green highlights) +- [ ] Slack evidence pack message +- [ ] Linter output: passing (green) +- [ ] Linter output: failing (red) +- [ ] Before/after side-by-side + +## Curl Command for Demo + +Save this as `mock-incident.json` and use: +```bash +curl -X POST http://localhost:8000/api/v1/webhooks/ \ + -H "Content-Type: application/json" \ + -d @docs/mock-incident.json +``` + +(Get the webhook-id from the canvas trigger configuration after setup) diff --git a/docs/mock-datadog-metrics.json b/docs/mock-datadog-metrics.json new file mode 100644 index 0000000000..55675c10b3 --- /dev/null +++ b/docs/mock-datadog-metrics.json @@ -0,0 +1,48 @@ +{ + "status": "ok", + "series": [ + { + "metric": "api.gateway.error_rate_5xx", + "display_name": "5xx Error Rate (%)", + "points": [ + [1774973400, 0.1], + [1774973460, 0.3], + [1774973520, 2.1], + [1774973580, 8.7], + [1774973640, 15.3], + [1774973700, 14.8] + ], + "tags": ["service:api-gateway", "env:production"], + "unit": "percent" + }, + { + "metric": "api.gateway.latency_p99", + "display_name": "P99 Latency (ms)", + "points": [ + [1774973400, 120], + [1774973460, 145], + [1774973520, 890], + [1774973580, 2340], + [1774973640, 4500], + [1774973700, 4200] + ], + "tags": ["service:api-gateway", "env:production"], + "unit": "millisecond" + }, + { + "metric": "api.gateway.request_count", + "display_name": "Request Count", + "points": [ + [1774973400, 15234], + [1774973460, 14890], + [1774973520, 12456], + [1774973580, 8934], + [1774973640, 6721], + [1774973700, 7102] + ], + "tags": ["service:api-gateway", "env:production"], + "unit": "request" + } + ], + "query": "avg:api.gateway.error_rate_5xx{service:api-gateway,env:production}" +} diff --git a/docs/mock-github-release.json b/docs/mock-github-release.json new file mode 100644 index 0000000000..5b5f4caee6 --- /dev/null +++ b/docs/mock-github-release.json @@ -0,0 +1,17 @@ +{ + "id": 12345678, + "tag_name": "v2.14.3", + "name": "Release v2.14.3 - Order Service Refactor", + "body": "## Changes\n- Refactored order validation logic\n- Migrated to new payment gateway client\n- Updated database connection pooling\n\n## Authors\n- @braca (order validation)\n- @fedja (payment gateway)\n\n## Risk: Medium\nDatabase connection pool size changed from 20 to 50", + "draft": false, + "prerelease": false, + "created_at": "2026-03-28T14:25:00Z", + "published_at": "2026-03-28T14:25:30Z", + "author": { + "login": "braca", + "id": 87654321 + }, + "html_url": "https://github.com/acme-corp/api-service/releases/tag/v2.14.3", + "tarball_url": "https://api.github.com/repos/acme-corp/api-service/tarball/v2.14.3", + "zipball_url": "https://api.github.com/repos/acme-corp/api-service/zipball/v2.14.3" +} diff --git a/docs/mock-incident.json b/docs/mock-incident.json new file mode 100644 index 0000000000..b9a76fb12f --- /dev/null +++ b/docs/mock-incident.json @@ -0,0 +1,74 @@ +{ + "event": { + "id": "01DEN4HPBQAAAG05V5QQYBRZMF", + "event_type": "incident.triggered", + "resource_type": "incident", + "occurred_at": "2026-03-28T14:30:00.000Z", + "agent": { + "html_url": "https://acme.pagerduty.com/users/PLH1HKV", + "id": "PLH1HKV", + "self": "https://api.pagerduty.com/users/PLH1HKV", + "summary": "Monitoring Bot", + "type": "user_reference" + }, + "data": { + "id": "PGR0VU2", + "type": "incident", + "self": "https://api.pagerduty.com/incidents/PGR0VU2", + "html_url": "https://acme.pagerduty.com/incidents/PGR0VU2", + "number": 42, + "status": "triggered", + "incident_key": "hackathon-demo-incident-001", + "created_at": "2026-03-28T14:30:00Z", + "title": "API Gateway: 5xx error rate spike to 15% on /api/v1/orders", + "urgency": "high", + "service": { + "html_url": "https://acme.pagerduty.com/services/PF9KMXH", + "id": "PF9KMXH", + "self": "https://api.pagerduty.com/services/PF9KMXH", + "summary": "API Gateway (Production)", + "type": "service_reference" + }, + "assignees": [ + { + "html_url": "https://acme.pagerduty.com/users/PTUXL6G", + "id": "PTUXL6G", + "self": "https://api.pagerduty.com/users/PTUXL6G", + "summary": "Dragan Petrovic (On-Call SRE)", + "type": "user_reference" + } + ], + "escalation_policy": { + "html_url": "https://acme.pagerduty.com/escalation_policies/PUS0KTE", + "id": "PUS0KTE", + "self": "https://api.pagerduty.com/escalation_policies/PUS0KTE", + "summary": "Production - Critical", + "type": "escalation_policy_reference" + }, + "teams": [ + { + "html_url": "https://acme.pagerduty.com/teams/PFCVPS0", + "id": "PFCVPS0", + "self": "https://api.pagerduty.com/teams/PFCVPS0", + "summary": "Platform Engineering", + "type": "team_reference" + } + ], + "priority": { + "html_url": "https://acme.pagerduty.com/priorities/PSO75BM", + "id": "PSO75BM", + "self": "https://api.pagerduty.com/priorities/PSO75BM", + "summary": "P1", + "type": "priority_reference" + }, + "conference_bridge": { + "conference_number": "+1 555-123-4567,,987654321#", + "conference_url": "https://meet.google.com/abc-defg-hij" + }, + "body": { + "type": "incident_body", + "details": "5xx error rate on API Gateway spiked from 0.1% to 15.3% at 14:28 UTC. Affects /api/v1/orders endpoint. 1,247 users impacted in last 2 minutes. Correlated with deployment deploy-api-v2.14.3 at 14:25 UTC." + } + } + } +} diff --git a/docs/mock-pagerduty-logs.json b/docs/mock-pagerduty-logs.json new file mode 100644 index 0000000000..7848a1190f --- /dev/null +++ b/docs/mock-pagerduty-logs.json @@ -0,0 +1,40 @@ +{ + "log_entries": [ + { + "id": "R1YCD0YVSA", + "type": "trigger_log_entry", + "created_at": "2026-03-28T14:30:00Z", + "summary": "Triggered by Datadog monitor: API 5xx Error Rate > 5%", + "agent": { + "type": "service_reference", + "summary": "Datadog Integration" + }, + "channel": { + "type": "auto", + "details": "Monitor: api-gateway-5xx-rate, Threshold: 5%, Current: 15.3%" + } + }, + { + "id": "R2BDE1ZWTA", + "type": "notify_log_entry", + "created_at": "2026-03-28T14:30:05Z", + "summary": "Notified Dragan Petrovic via push notification", + "agent": { + "type": "user_reference", + "summary": "PagerDuty" + } + }, + { + "id": "R3CEF2AXUB", + "type": "annotate_log_entry", + "created_at": "2026-03-28T14:30:10Z", + "summary": "Correlated with deploy-api-v2.14.3 (14:25 UTC)", + "agent": { + "type": "service_reference", + "summary": "Deploy Tracker" + } + } + ], + "total": 3, + "more": false +} diff --git a/docs/qe-reports-March-28/00-EXECUTIVE-SUMMARY.md b/docs/qe-reports-March-28/00-EXECUTIVE-SUMMARY.md new file mode 100644 index 0000000000..a31ac63e58 --- /dev/null +++ b/docs/qe-reports-March-28/00-EXECUTIVE-SUMMARY.md @@ -0,0 +1,144 @@ +# SuperPlane QE Analysis - Executive Summary + +**Date:** March 28, 2026 +**Methodology:** QE Queen Swarm — 6 specialized agents + MCP fleet orchestration +**Fleet:** fleet-bf88d6ec | Topology: Hierarchical | Agents: 15 max +**Scope:** Full-stack — Go backend (944 files, 407K lines) + React frontend (720 files, 138K lines) +**Total Functions Analyzed:** 6,931 + +--- + +## Overall Quality Gate: FAILED (49/100) + +| Metric | Score | Status | +|--------|-------|--------| +| Overall Quality | 49.0/100 | FAIL | +| Cyclomatic Complexity | 30.95 avg | CRITICAL (threshold: 15) | +| Maintainability | 57.46/100 | WARN (threshold: 65) | +| Security Score | 85/100 | GOOD | +| Line Coverage (frontend) | 79.1% avg | OK | +| Branch Coverage (frontend) | 99.2% avg | GOOD | +| Function Coverage (frontend) | 20.8% avg | CRITICAL | + +--- + +## Cross-Report Findings Summary + +### STOP-THE-LINE Issues (Fix Before Next Release) + +| # | Finding | Source Report | Severity | +|---|---------|-------------|----------| +| 1 | **WebSocket Hub deadlock** — `BroadcastToWorkflow` holds RLock, calls `unregisterClient` needing WLock. Full system deadlock when client buffer fills. | Performance | CRITICAL | +| 2 | **WebSocket origin check disabled** — `CheckOrigin: func(r) { return true }`. Cross-Site WebSocket Hijacking vulnerability. | Security | CRITICAL | +| 3 | **Dev auth bypass routes** — `APP_ENV=development` completely bypasses authentication with hardcoded mock user. | Security | CRITICAL | +| 4 | **Unbounded polling queries** — `ListPendingNodeExecutions`, `ListPendingCanvasEvents`, `ListNodeRequests` fetch ALL rows with no LIMIT. OOM risk. | Performance | CRITICAL | +| 5 | **6,589-line god component** — `workflowv2/index.tsx` with 179 hooks, 1,121 cyclomatic complexity, 238 commits. Every state change re-evaluates entire tree. | Complexity | CRITICAL | + +### High Priority (Within Sprint) + +| # | Finding | Source | Severity | +|---|---------|--------|----------| +| 6 | NoOpEncryptor available in production (`NO_ENCRYPTION=yes`) | Security | HIGH | +| 7 | No password complexity validation (empty check only) | Security | HIGH | +| 8 | No rate limiting on password login | Security | HIGH | +| 9 | Cookie Secure flag depends on TLS termination (broken behind proxy) | Security | HIGH | +| 10 | No security headers (CSP, HSTS, X-Frame-Options) | Security | HIGH | +| 11 | N+1 queries in `ListCanvases` -> `SerializeCanvas` (100+ queries) | Performance | HIGH | +| 12 | Default DB pool size of 5 for 6+ workers with semaphore(25) each | Performance | HIGH | +| 13 | No route-level code splitting (entire app in single bundle) | Performance | HIGH | +| 14 | Zustand store triggers broad re-renders via Map replacement | Performance | HIGH | +| 15 | Excessive query invalidation per WebSocket message (thundering herd) | Performance | HIGH | +| 16 | `window.confirm()` for destructive operations in 12+ locations | QX | HIGH | +| 17 | Only one error boundary for entire application | QX | HIGH | +| 18 | Frontend test coverage: 7 files for 716 source files (~1%) | Testing | CRITICAL | +| 19 | All 8 page directories, all 16 hooks, entire 146-file UI layer: ZERO tests | Testing | CRITICAL | +| 20 | 148 `Sleep` calls in E2E tests causing flakiness | Testing | HIGH | + +### Domain-Specific Scores + +| Domain | Score | Grade | Key Issue | +|--------|-------|-------|-----------| +| Code Quality (Go) | 86.0 MI avg | B+ | Integration layer complexity | +| Code Quality (TS) | 94.1 MI avg | A- | 2-3 "mega-files" drag it down | +| Security | 85/100 | B+ | 2 CRITICAL + 6 HIGH findings | +| Performance | 40.5 weighted | D | 4 CRITICAL + 10 HIGH findings | +| QX | 71/100 | C+ | Responsive design (55) weakest | +| Testing | ~1% frontend | F | Backend good (40%), frontend desert | +| Product (SFDIPOT) | MEDIUM-HIGH risk | C | 8 P0 + 19 P1 risks identified | + +--- + +## Finding Totals Across All Reports + +| Severity | Complexity | Security | Performance | QX | Testing | SFDIPOT | **Total** | +|----------|-----------|----------|-------------|-----|---------|---------|-----------| +| CRITICAL | 2 | 2 | 4 | 5 | 4 | 8 | **25** | +| HIGH | 3 | 6 | 10 | 5 | 5 | 19 | **48** | +| MEDIUM | 5 | 5 | 8 | 8 | 3 | 24 | **53** | +| LOW | 2 | 4 | 1 | 3 | 2 | 12 | **24** | +| **Total** | **12** | **17** | **23** | **21** | **14** | **63** | **150** | + +--- + +## Strengths Identified + +1. **Solid security architecture** — AES-256-GCM encryption, bcrypt at cost 12, Casbin RBAC, comprehensive SSRF protection, parameterized SQL throughout, no XSS vectors +2. **Well-structured Go backend** — Clean package boundaries, registry pattern, 42/43 integrations tested +3. **Backend test coverage** — 493 Go test files (40% file coverage) +4. **Clean defect prediction** — No backend files exceeded defect probability threshold +5. **Real-time collaboration** — WebSocket integration with auto-reconnection, per-node message queuing +6. **Good form validation** — `useRealtimeValidation` hook with debounced, real-time feedback +7. **Well-designed scoped tokens** — Proper audience, issuer, scope validation +8. **Execution engine correctness** — `SELECT FOR UPDATE SKIP LOCKED` prevents double-execution + +--- + +## Top 10 Priority Actions + +| # | Action | Impact | Effort | Reports | +|---|--------|--------|--------|---------| +| 1 | Fix WebSocket Hub deadlock (collect unregister clients after RUnlock) | System stability | 30 min | Performance, SFDIPOT | +| 2 | Fix WebSocket origin validation (check against BASE_URL) | Security | 30 min | Security, SFDIPOT | +| 3 | Add LIMIT to 3 unbounded polling queries | OOM prevention | 15 min | Performance, SFDIPOT | +| 4 | Guard dev auth routes + NoOpEncryptor in production | Security | 1 hour | Security | +| 5 | Add password complexity + login rate limiting | Auth security | 2 hours | Security | +| 6 | Add security headers middleware (CSP, HSTS) | Security | 1 hour | Security | +| 7 | Add route-level code splitting with React.lazy() | Frontend perf | 2 hours | Performance | +| 8 | Increase DB pool size default + add ConnMaxLifetime | DB reliability | 10 min | Performance | +| 9 | Debounce WebSocket query invalidation | Server load | 1 hour | Performance | +| 10 | Replace `window.confirm()` with AlertDialog (12+ locations) | UX consistency | 3 hours | QX | + +--- + +## Reports Index + +| # | Report | File | Findings | +|---|--------|------|----------| +| 1 | Code Quality & Complexity | [01-code-quality-complexity.md](01-code-quality-complexity.md) | 12 findings, 7 refactoring recommendations | +| 2 | Security Analysis | [02-security-analysis.md](02-security-analysis.md) | 17 findings (2 CRITICAL, 6 HIGH) | +| 3 | Performance Analysis | [03-performance-analysis.md](03-performance-analysis.md) | 23 findings (4 CRITICAL, 10 HIGH) | +| 4 | Quality Experience (QX) | [04-qx-analysis.md](04-qx-analysis.md) | 21 findings across 7 dimensions | +| 5 | SFDIPOT Product Factors | [05-sfdipot-product-factors.md](05-sfdipot-product-factors.md) | 63 test ideas, 14 exploratory sessions | +| 6 | Test Suite & Coverage | [06-test-coverage-analysis.md](06-test-coverage-analysis.md) | 14 findings, 16 recommendations | +| 7 | MCP Fleet Raw Results | [07-mcp-fleet-results.md](07-mcp-fleet-results.md) | Fleet data, coverage analysis, SAST results | + +--- + +## Methodology + +**QE Queen Swarm Coordination:** +- Fleet `fleet-bf88d6ec` initialized with hierarchical topology, 15 max agents +- 8 enabled domains: test-generation, test-execution, coverage-analysis, quality-assessment, defect-intelligence, security-compliance, requirements-validation, code-analysis +- 6 specialized agents ran in parallel (~5-8 min each): + - `qe-code-complexity` — 48 tool operations, analyzed 6,931 functions + - `qe-security-reviewer` — 113 tool operations, reviewed OWASP Top 10 + - `qe-performance-reviewer` — 69 tool operations, found 23 performance issues + - `qe-qx-partner` — 75 tool operations, scored 7 QX dimensions + - `qe-product-factors-assessor` — 93 tool operations, full SFDIPOT analysis + - `qe-test-architect` — 85 tool operations, inventoried 534 test files +- MCP fleet provided: quality gate evaluation, SAST scanning, coverage analysis, defect prediction, code indexing + +**Total analysis operations:** 483 tool calls across 6 agents + 9 MCP fleet operations + +--- +*Generated by AQE v3 QE Queen Swarm — March 28, 2026* diff --git a/docs/qe-reports-March-28/01-code-quality-complexity.md b/docs/qe-reports-March-28/01-code-quality-complexity.md new file mode 100644 index 0000000000..3862d1ea45 --- /dev/null +++ b/docs/qe-reports-March-28/01-code-quality-complexity.md @@ -0,0 +1,156 @@ +# SuperPlane Code Quality & Complexity Analysis Report + +**Date**: 2026-03-28 +**Scope**: Full-stack — Go backend (944 files, 407,022 lines) + TypeScript frontend (720 files, 137,910 lines) +**Total Functions Analyzed**: 6,931 + +--- + +## Executive Summary + +The SuperPlane codebase has an average maintainability index of 86.0 (Go) and 94.1 (TypeScript) — both in "Good" range. However, **severe hotspots** exist that represent outsized risk: + +- **382 functions** exceed cyclomatic complexity 15 (threshold) +- **209 functions** exceed cyclomatic complexity 20 +- **21 files** are in critical maintainability range (MI < 20) +- The **#1 risk**: `workflowv2/index.tsx` — a 6,589-line god component with 179 hooks and cyclomatic complexity exceeding 1,100 + +--- + +## Top 20 Most Complex Functions + +| Rank | Cyc | Lines | Function | File | Lang | +|------|-----|-------|----------|------|------| +| 1 | 284 | 901 | `safeName` | `workflowv2/index.tsx:4945` | TS | +| 2 | 218 | 1090 | `pushIndexed` (inner) | `workflowv2/index.tsx:553` | TS | +| 3 | 171 | 745 | `getIncomingNodes` (inner) | `workflowv2/index.tsx:2342` | TS | +| 4 | 158 | 900 | `ComponentSidebar` | `componentSidebar/index.tsx:181` | TS | +| 5 | 115 | 542 | `tokenize` | `lib/exprEvaluator.ts:54` | TS | +| 6 | 108 | 610 | `handleBeforeUnload` (inner) | `workflowv2/index.tsx:1643` | TS | +| 7 | 101 | 715 | `componentType` (inner) | `workflowv2/index.tsx:4195` | TS | +| 8 | 81 | 575 | `CanvasContent` | `CanvasPage/index.tsx:1877` | TS | +| 9 | 81 | 417 | `handleKeyDown` | `AutoCompleteInput.tsx:1048` | TS | +| 10 | 81 | 370 | `existingNodeNames` (inner) | `workflowv2/index.tsx:3336` | TS | +| 11 | 80 | 258 | `renderField` | `configurationFieldRenderer/index.tsx:269` | TS | +| 12 | 79 | 526 | `getApprovalStatusColor` | `chainItem/ChainItem.tsx:315` | TS | +| 13 | 61 | 263 | `Sentry.ListResources` | `integrations/sentry/sentry.go:555` | Go | +| 14 | 58 | 236 | `UpdateApp.Execute` | `integrations/digitalocean/update_app.go:224` | Go | +| 15 | 47 | 179 | `Hetzner.ListResources` | `integrations/hetzner/hetzner.go:98` | Go | +| 16 | 37 | 90 | `GCP.ListResources` | `integrations/gcp/gcp.go:880` | Go | +| 17 | 36 | 102 | `validateFieldValue` | `configuration/validation.go:623` | Go | +| 18 | 36 | 131 | `ParseCanvas` | `canvases/serialization.go:197` | Go | +| 19 | 34 | 119 | `OnIncidentTimelineEvent` | `integrations/rootly/on_incident_timeline_event.go:222` | Go | +| 20 | 32 | 159 | `Server.setupOwner` | `public/setup_owner.go:41` | Go | + +**Key observation**: Top 12 most complex functions are all TypeScript. Go complexity concentrates in integration `ListResources` and `Execute` methods. + +--- + +## Top 10 Code Smells + +### 1. God Component — `WorkflowPageV2` (CRITICAL) +**File**: `web_src/src/pages/workflowv2/index.tsx` (6,589 lines) +- 179 hooks (30 useState, 20 useEffect, 85 useCallback, 38 useMemo, 6 useRef) +- 48 toast notifications, 43 loading states +- 238 commits since January 2025 (highest churn of any file) +- Responsible for canvas editing, version control, change requests, node management, sidebar, websocket, drag-and-drop, YAML import/export, AI operations, approvals — all in one function + +### 2. God Object — `*Client` with 598 Methods (HIGH) +Aggregated across `pkg/integrations/*/client.go` files. Single `Client` struct per integration with massive surface area (e.g., `digitalocean/client.go` at 2,333 lines). + +### 3. Data Clumps / Long Parameter Lists (MEDIUM-HIGH) +48 Go functions accept 5+ parameters, 9 accept 8-9 parameters (e.g., `CreateIntegration` takes 9). Repeated `(ctx, registry, encryptor, authService, ...)` tuples indicate missing aggregate types. + +### 4. Excessive Proto Conversion Boilerplate (HIGH) +`pkg/grpc/actions/common.go` (1,177 lines, 52 functions): 84 TypeOptions references with mirrored `toProto`/`protoTo*` pairs. Textbook mechanical duplication. + +### 5. Monster Configuration Methods (HIGH) +`pkg/integrations/gcp/compute/create_vm.go:1376` (900 lines), `pkg/integrations/aws/ecs/service.go:247` (1,160 lines). Data files masquerading as code — 415 `configuration.Field{}` declarations total. + +### 6. Feature Envy — `CanvasPage` + Sidebar (MEDIUM-HIGH) +`web_src/src/ui/CanvasPage/index.tsx` (3,234 lines): Multiple components in one file, deeply intertwined prop passing, 12 `any` type usages. + +### 7. Missing Default in Switch Statements (MEDIUM) +68 Go `switch` statements lack `default` clauses, including type-dispatch code in validation and proto conversion. Unknown types silently fall through. + +### 8. Duplicated ListResources Pattern (MEDIUM) +106 structurally similar `ListResources` implementations averaging 100-260 lines each, differentiated only by client calls and field mappings. + +### 9. Unconstrained Goroutine Spawning (MEDIUM) +Some workers (cleanup workers) spawn goroutines without concurrency limits, while others properly use semaphores. + +### 10. TypeScript `any` Proliferation (MEDIUM) +`custom-component/index.tsx` (21 usages), `CanvasPage/index.tsx` (12), `CustomComponentBuilderPage/index.tsx` (17) — type safety eroded in the most complex files. + +--- + +## Top 10 Largest/Most Complex Files + +| Rank | Lines | MI | Churn | File | Key Issues | +|------|-------|-----|-------|------|------------| +| 1 | 6,589 | 0.0 | 238 | `workflowv2/index.tsx` | God component, 179 hooks | +| 2 | 3,234 | 0.0 | 163 | `CanvasPage/index.tsx` | Multi-component, 338 complexity | +| 3 | 2,368 | 0.0 | — | `gcp/compute/create_vm.go` | 900-line Configuration | +| 4 | 2,333 | 0.0 | — | `digitalocean/client.go` | 30+ method god client | +| 5 | 1,568 | 0.0 | — | `AutoCompleteInput.tsx` | 272 complexity | +| 6 | 1,485 | 0.0 | — | `gcp/gcp.go` | 248 complexity | +| 7 | 1,434 | 0.0 | — | `AutoCompleteInput/core.ts` | Expression tokenizer | +| 8 | 1,406 | — | — | `aws/ecs/service.go` | 1,160-line function | +| 9 | 1,359 | 11.4 | 51 | `custom-component/index.tsx` | 178 complexity, 21 `any` | +| 10 | 1,325 | 4.6 | — | `gcp/compute/list_resource_handler.go` | Resource caching, 200 complexity | + +--- + +## Churn-Complexity Hotspots (Highest Defect Risk) + +| File | Commits | Complexity | Risk | +|------|---------|------------|------| +| `workflowv2/index.tsx` | 238 | 1,121 | **CRITICAL** | +| `CanvasPage/index.tsx` | 163 | 338 | **HIGH** | +| `BuildingBlocksSidebar/index.tsx` | 104 | 127 | MEDIUM | +| `server.go` + `public/server.go` | 102+97 | 122 | MEDIUM | +| `componentSidebar/index.tsx` | 79 | 112 | MEDIUM | + +--- + +## Maintainability Index Summary + +**Go Backend**: Average MI **86.0** (Good) +- 63% of files Good (MI >= 80) +- 13 files Critical (MI < 20) — concentrated in integrations and proto conversion + +**TypeScript Frontend**: Average MI **94.1** (Good) +- 80% of files Good (MI >= 80) +- 8 files Critical (MI < 20) — concentrated in workflow editor and canvas page + +--- + +## Refactoring Recommendations (Prioritized) + +### Priority 1 (Critical): Decompose `WorkflowPageV2` +Extract into 8-12 focused sub-components and custom hooks. Estimated reduction: 6,589 -> 800 lines for parent orchestrator. 10x+ testability improvement. + +### Priority 2 (High): Move Configuration Declarations to Data Files +Move 900-1,160 line `Configuration()` methods to YAML/JSON. Eliminates ~50,000 lines of nested struct literals. + +### Priority 3 (High): Generate Proto Conversion Code +Code-generate `toProto`/`protoTo*` pairs. Eliminates ~800 lines of mechanical duplication. + +### Priority 4 (High): Decompose `CanvasPage` +Extract 3 components into separate files with explicit prop interfaces. + +### Priority 5 (Medium): Introduce Parameter Objects in gRPC Actions +Create `ActionContext` aggregates. Reduce parameter counts from 8-9 -> 2-3. + +### Priority 6 (Medium): Standardize Worker Concurrency Limits +Add `semaphore.Weighted` to all workers, including cleanup workers. + +### Priority 7 (Medium): Add Default Clauses to Switch Statements +Add to all 68 missing `default` clauses, especially type-dispatch code. + +--- + +**Bottom Line**: 80%+ of the codebase is well-maintained. The problems are concentrated in (1) the frontend workflow editor — a 6,589-line monolith demanding immediate decomposition, and (2) integration Configuration methods embedding data as code. Addressing these two areas would dramatically improve the overall quality profile. + +--- +*Generated by AQE v3 Code Complexity Agent* diff --git a/docs/qe-reports-March-28/02-security-analysis.md b/docs/qe-reports-March-28/02-security-analysis.md new file mode 100644 index 0000000000..8ec9a6d663 --- /dev/null +++ b/docs/qe-reports-March-28/02-security-analysis.md @@ -0,0 +1,200 @@ +# SuperPlane Security Analysis Report + +**Date**: 2026-03-28 +**Scope**: Full-stack — Go backend (pkg/) + React frontend (web_src/src/) +**Classification**: CONFIDENTIAL — Internal Use Only +**Findings**: 22 total | 2 CRITICAL, 6 HIGH, 5 MEDIUM, 4 LOW, 5 INFORMATIONAL +**Weighted Score**: 11.75 + +--- + +## Executive Summary + +The SuperPlane codebase demonstrates a **generally solid security posture** with proper use of RBAC (Casbin), parameterized SQL queries (GORM), AES-256-GCM encryption, bcrypt password hashing, and comprehensive SSRF protection. The initial SAST report of 174 "hardcoded credentials" is overwhelmingly **false positives** (172 of 174 confirmed) — these are struct field names and configuration references, not actual embedded secrets. + +However, there are **two critical and six high-severity findings** that require immediate attention. + +--- + +## CRITICAL Findings + +### 1. WebSocket Origin Check Disabled (CRITICAL) + +**File:** `pkg/public/server.go:134-139` +**CWE:** CWE-346 (Origin Validation Error) | **OWASP:** A01 + +```go +upgrader: &websocket.Upgrader{ + CheckOrigin: func(r *http.Request) bool { + // TODO: implement origin checking + return true + }, +} +``` + +The WebSocket upgrader accepts connections from ANY origin. An attacker can create a malicious page that connects to the SuperPlane WebSocket API using the victim's cookies, receiving real-time event data (Cross-Site WebSocket Hijacking). + +**Remediation:** Validate the `Origin` header against the configured `BASE_URL`. + +### 2. Development Auth Bypass Routes (CRITICAL) + +**File:** `pkg/authentication/authentication.go:120-125, 153-197` +**CWE:** CWE-287 (Improper Authentication) | **OWASP:** A05 + +When `APP_ENV == "development"`, authentication is completely bypassed with a hardcoded mock user (`dev-user-123`). If `APP_ENV` is accidentally set to `development` in production, any request to `/auth/{provider}` auto-authenticates. + +**Remediation:** Add defense-in-depth: bind address checks, prominent startup logging, startup panic if dev + production-looking config. + +--- + +## HIGH Findings + +### 3. NoOpEncryptor Available in Production + +**File:** `pkg/server/server.go:399-401` | **CWE:** CWE-311 + +`NO_ENCRYPTION=yes` disables ALL encryption for secrets, credentials, and tokens. No guard prevents this in production. + +**Remediation:** Disallow unless `APP_ENV` is `development` or `test`. + +### 4. No Password Complexity Validation + +**File:** `pkg/authentication/authentication.go:426-430` | **CWE:** CWE-521 + +Signup only validates non-empty password. No minimum length, complexity, or breached password checks. + +**Remediation:** Enforce minimum 8 characters. + +### 5. No Rate Limiting on Password Login + +**File:** `pkg/authentication/authentication.go:337-377` | **CWE:** CWE-307 + +No rate limiting, account lockout, or brute force protection on password login (unlike magic code auth which has `magicCodeRateLimit = 5`). + +**Remediation:** Add per-IP and per-account rate limiting. + +### 6. Cookie Secure Flag Depends on TLS Termination + +**File:** `pkg/authentication/authentication.go:294` | **CWE:** CWE-614 + +`Secure: r.TLS != nil` — if TLS is terminated at a reverse proxy (common production pattern), cookies are sent over HTTP. + +**Remediation:** Check `X-Forwarded-Proto` header or add config flag. + +### 7. No Security Headers (CSP, HSTS, etc.) + +**CWE:** CWE-693 + +No `Content-Security-Policy`, `Strict-Transport-Security`, `X-Frame-Options`, or `X-Content-Type-Options` headers anywhere. + +**Remediation:** Add security headers middleware. + +### 8. SSH Host Key Verification Disabled + +**File:** `pkg/components/ssh/client.go:88` | **CWE:** CWE-295 + +`ssh.InsecureIgnoreHostKey()` — vulnerable to MITM attacks between SuperPlane and SSH targets. + +**Remediation:** Add optional host key verification. At minimum, log fingerprints. + +--- + +## MEDIUM Findings + +### 9. JWT Uses HS256 Symmetric Signing + +**File:** `pkg/jwt/jwt.go:41` | Any service that can verify tokens can forge them. + +### 10. Encryption Key Without KDF + +**File:** `pkg/server/server.go:391-403` | Raw env var used as AES key, no HKDF applied. + +### 11. gRPC Reflection Enabled Unconditionally + +**File:** `pkg/grpc/server.go:158` | Aids attacker reconnaissance. + +### 12. No Rate Limiting on gRPC API + +**File:** `pkg/grpc/server.go:83-92` | No throttling interceptor. + +### 13. No CSRF Protection on Auth Endpoints + +`SameSite: Lax` mitigates most cases but not same-site attacks or older browsers. + +--- + +## LOW Findings + +### 14. Full Metadata Logged Including Potential Tokens + +**File:** `pkg/authorization/interceptor.go:357,363` + +### 15. SSH Private Key Preview in Error Messages + +**File:** `pkg/components/ssh/client.go:124-131` — First 50 chars of private key in errors. + +### 16. Server-Side Template with Env Vars + +**File:** `pkg/web/index_template.go:24-37` — Low risk currently (only env vars injected). + +### 17. Open Redirect Edge Cases + +**File:** `pkg/authentication/authentication.go:993-1023` — Consider blocking backslash and embedded credentials. + +--- + +## SAST False Positive Analysis + +**172 of 174 initial SAST findings are confirmed FALSE POSITIVES.** + +| Location | Nature | Verdict | +|----------|--------|---------| +| `pkg/authentication/` | Struct field `Password string` in `ProviderConfig` | Field names, not values | +| `pkg/cli/commands/secrets/` | CLI commands for CRUD on secrets entities | Management code, not secrets | +| `pkg/components/ssh/` | `AuthSpec` struct with fields referencing `SecretKeyRef` | References to vault, not hardcoded | +| `pkg/configuration/` | `SecretKeyRef` struct, `FieldTypeSecretKey` constant | Schema definitions | +| `pkg/impersonation/` | `CookieName = "impersonation_token"` | Cookie name constant | +| `pkg/integrations/aws/` | `AccessKeyID`, `SecretAccessKey` in STS response struct | AWS API field names | + +--- + +## Positive Security Practices + +1. **Comprehensive SSRF protection** — DNS rebinding defense, cloud metadata blocking, private IP filtering +2. **AES-256-GCM encryption** with random nonces for all secrets at rest +3. **Bcrypt password hashing** at cost 12 +4. **RBAC with Casbin** covering all 50+ gRPC endpoints +5. **Error sanitization** preventing internal/database error leakage +6. **Parameterized SQL** throughout — no injection vectors +7. **HttpOnly + SameSite:Lax cookies** for all auth tokens +8. **Scoped token design** with audience, issuer, and expiry validation +9. **Input validation** on SSH component (env var names, port ranges) +10. **Impersonation audit logging** with admin ID, target ID, client IP +11. **Open redirect protection** with path-prefix validation +12. **No XSS vectors** in frontend — no `dangerouslySetInnerHTML`, no `eval()` + +--- + +## Remediation Priority + +### Immediate (Before Next Release) +1. Fix WebSocket origin validation — replace `return true` with `BASE_URL` check +2. Guard dev auth routes — defense-in-depth beyond `isDev` +3. Guard NoOpEncryptor — reject unless in dev/test env + +### Short-Term (Within Sprint) +4. Add password complexity — minimum 8 chars +5. Add password login rate limiting +6. Fix cookie Secure flag — check `X-Forwarded-Proto` +7. Add security headers middleware + +### Medium-Term (Next Quarter) +8. Migrate JWT to asymmetric signing (RS256/ES256) +9. Apply HKDF to encryption key +10. Gate gRPC reflection behind dev-only flag +11. Add CSRF tokens +12. Add gRPC rate limiting +13. Add optional SSH host key verification + +--- +*Generated by AQE v3 Security Reviewer Agent* diff --git a/docs/qe-reports-March-28/03-performance-analysis.md b/docs/qe-reports-March-28/03-performance-analysis.md new file mode 100644 index 0000000000..2cb5addd01 --- /dev/null +++ b/docs/qe-reports-March-28/03-performance-analysis.md @@ -0,0 +1,262 @@ +# SuperPlane Performance Analysis Report + +**Date**: 2026-03-28 +**Scope**: Full-stack — Go backend (pkg/) + React frontend (web_src/src/) +**Findings**: 23 total | 4 CRITICAL, 10 HIGH, 8 MEDIUM, 1 LOW +**Weighted Score**: 40.50 + +--- + +## Executive Summary + +This report covers a comprehensive analysis of the SuperPlane project's Go backend and React frontend. The most critical issues are **unbounded database queries in polling workers**, **a WebSocket Hub deadlock**, a **6,589-line monolithic React component**, and **lack of code splitting at the route level**. + +--- + +## SECTION 1: DATABASE PERFORMANCE + +### FINDING 1 (CRITICAL): Unbounded `ListPendingNodeExecutions()` Query + +**File:** `pkg/models/canvas_node_execution.go:132-144` + +Fetches **all** pending executions without any `LIMIT` clause. Polled every **60 seconds** by `NodeExecutor.Start()` at `pkg/workers/node_executor.go:64-74`. In a production backlog scenario with thousands of pending executions, this loads every row into memory on every tick. + +**Impact:** Multi-second query times and GiB-level memory spikes under load. +**Recommendation:** Add a `LIMIT` clause (e.g., 500). The semaphore already limits concurrency to 25. + +### FINDING 2 (CRITICAL): Unbounded `ListPendingCanvasEvents()` Query + +**File:** `pkg/models/canvas_event.go:206-220` + +Same pattern — fetches all pending events with no limit. Polled every **60 seconds** by `EventRouter.Start()`. + +**Impact:** Same as Finding 1. Event storms can fetch thousands of rows unbounded. +**Recommendation:** Add `LIMIT` matching the semaphore capacity (e.g., 100 or 500). + +### FINDING 3 (CRITICAL): Unbounded `ListNodeRequests()` Query (1s Poll) + +**File:** `pkg/models/canvas_node_request.go:62-81` + +Same unbounded fetch pattern, polled every **1 second** by `NodeRequestWorker.Start()`. + +**Impact:** Polling every 1 second without a limit is especially dangerous — constant full-table-scan pressure under burst conditions. +**Recommendation:** Add LIMIT. Consider increasing poll interval to 5s or switching to event-driven consumption. + +### FINDING 4 (HIGH): N+1 Query Pattern in `SerializeCanvas()` + +**File:** `pkg/grpc/actions/canvases/serialization.go:21-139` + +Issues **5+ sequential database queries** per call. Called for **every canvas** in `ListCanvases()` at `list_canvases.go:21-28`. For 50 canvases = 100-150 queries per list call. + +**Impact:** Response time degrades linearly with canvas count. +**Recommendation:** Batch-load related data for all canvases at once before serialization. + +### FINDING 5 (HIGH): O(n*m) String Comparison in Serialization + +**File:** `pkg/grpc/actions/canvases/list_node_executions.go:414-431` + +UUID-to-string comparison in nested loops: `event.ID.String() == execution.EventID.String()`. For 100 executions with 100 events = 10,000 string allocations. + +**Impact:** GC pressure hotspot at scale. +**Recommendation:** Build `map[uuid.UUID]CanvasEvent` index. Compare UUIDs as bytes. + +### FINDING 6 (HIGH): Missing `ConnMaxLifetime` in Database Pool + +**File:** `pkg/database/connection.go:78-86` + +Without `ConnMaxLifetime`, connections live indefinitely. Cloud environments with connection proxies have idle timeouts, causing intermittent "connection reset" errors. + +**Recommendation:** Add `sqlDB.SetConnMaxLifetime(30 * time.Minute)`. + +### FINDING 7 (HIGH): Default Pool Size of 5 Is Too Low + +**File:** `pkg/database/connection.go:35-44` + +6+ concurrent workers (each with semaphore concurrency of 25) share a single 5-connection pool. + +**Impact:** Workers will contend for connections, leading to serialized execution. +**Recommendation:** Default should be 20-30. + +### FINDING 8 (MEDIUM): Missing Composite Index on Queue Items + +Query `WHERE workflow_id = ? AND node_id = ? ORDER BY created_at ASC` has no composite index. + +**Recommendation:** `CREATE INDEX idx_wf_node_queue_items_node_created ON workflow_node_queue_items (workflow_id, node_id, created_at ASC);` + +### FINDING 9 (MEDIUM): Missing Index for Ready Nodes Query + +`ListCanvasNodesReady()` filters on `(state, type)` — no composite index exists. + +**Recommendation:** `CREATE INDEX idx_workflow_nodes_state_type ON workflow_nodes (state, type) WHERE deleted_at IS NULL;` + +--- + +## SECTION 2: CONCURRENCY AND MEMORY MANAGEMENT + +### FINDING 10 (HIGH): `context.Background()` in Semaphore Acquisition + +**Files:** `pkg/workers/node_queue_worker.go:81`, `pkg/workers/node_executor.go:82`, `pkg/workers/event_router.go:64` + +Workers use `context.Background()` for semaphore acquisition. During shutdown, goroutines hang indefinitely. + +**Recommendation:** Pass parent `ctx` to `semaphore.Acquire()`. + +### FINDING 11 (HIGH): WebSocket Hub Deadlock + +**File:** `pkg/public/ws/ws_hub.go:127-142` + +`BroadcastToWorkflow` holds a **read lock** and calls `unregisterClient()` which requires a **write lock**. When a client's 4096-message buffer fills up, the entire Hub permanently deadlocks. + +**Impact:** Full system deadlock killing all WebSocket connections. +**Recommendation:** Collect clients to unregister and handle after releasing the read lock. + +### FINDING 12 (MEDIUM): Non-Thread-Safe DB Singleton Init + +**File:** `pkg/database/connection.go:26-33` + +Lazy initialization without mutex or `sync.Once`. Multiple goroutines calling `Conn()` simultaneously during startup can create multiple pools. + +**Recommendation:** Use `sync.Once`. + +--- + +## SECTION 3: API PERFORMANCE + +### FINDING 13 (HIGH): Sequential DB Calls in DescribeCanvas + +**File:** `pkg/grpc/actions/canvases/serialization.go:71-107` + +4 independent database queries executed sequentially when `includeStatus = true`. Could be parallelized. + +**Impact:** Response time = sum of all 4 queries instead of max. 50-200ms wasted. +**Recommendation:** Wrap in goroutines with `sync.WaitGroup`. + +### FINDING 14 (MEDIUM): Per-Canvas DB Calls in ListCanvases + +**File:** `pkg/grpc/actions/canvases/list_canvases.go:14-34` + +Even with `includeStatus=false`, each canvas makes 2-3 DB queries. + +**Recommendation:** Batch-load all versions and nodes in single queries. + +--- + +## SECTION 4: FRONTEND RENDERING PERFORMANCE + +### FINDING 15 (CRITICAL): 6,589-Line Monolithic Component + +**File:** `web_src/src/pages/workflowv2/index.tsx` + +124 `useMemo`/`useCallback`, 21 `useEffect` calls, 100+ imports. Any state change forces React to re-evaluate all memoization hooks. + +**Impact:** Every WebSocket message triggers re-evaluation of the entire component tree. +**Recommendation:** Extract into ``, ``, ``, etc. + +### FINDING 16 (HIGH): No Route-Level Code Splitting + +**File:** `web_src/src/App.tsx:1-142` + +All pages imported eagerly. No `React.lazy()` used anywhere. Entire application bundled into a single chunk. + +**Impact:** Initial page load downloads everything regardless of which page the user visits. +**Recommendation:** Use `React.lazy()` for route-level components with Vite code splitting. + +### FINDING 17 (HIGH): Zustand Store Triggers Broad Re-Renders + +**File:** `web_src/src/stores/nodeExecutionStore.ts:336-352` + +Every update creates a new `Map` and increments a global `version` counter. Any component subscribing re-renders on every update regardless of which node changed. + +**Impact:** N nodes on a canvas = N unnecessary re-renders per single node update. +**Recommendation:** Use Zustand selectors with `shallow` equality. Remove global `version` counter. + +### FINDING 18 (MEDIUM): `staleTime: 0` on Canvas Query + +**File:** `web_src/src/hooks/useCanvasData.ts:152` + +Every mount/focus triggers refetch despite WebSocket handling real-time updates. + +**Recommendation:** Set `staleTime` to 10-30 seconds. + +### FINDING 19 (MEDIUM): 3-Second Polling for Canvas Memory + +**File:** `web_src/src/hooks/useCanvasData.ts:774` + +`refetchInterval: 3000` polls regardless of whether memory view is active. + +**Recommendation:** Only enable polling when the memory panel is visible. + +--- + +## SECTION 5: WEBSOCKET PERFORMANCE + +### FINDING 20 (HIGH): Excessive Query Invalidation per WebSocket Message + +**File:** `web_src/src/hooks/useCanvasWebsocket.ts:59-120` + +Every WebSocket event invalidates the infinite events list. A single execution flow generates ~20+ events = 20+ full refetches. + +**Impact:** Thundering herd on the server and UI jank. +**Recommendation:** Debounce invalidation (500ms) or delay until execution chain completes. + +### FINDING 21 (MEDIUM): 4096-Message WebSocket Client Buffer + +**File:** `pkg/public/ws/ws_hub.go:149` + +With JSON payloads of 1-5KB, this could consume 4-20MB per client. + +**Impact:** 100 users = 400MB-2GB for WebSocket buffers alone. +**Recommendation:** Reduce to 256-512 with message coalescing. + +--- + +## SECTION 6: BUILD AND CONFIGURATION + +### FINDING 22 (MEDIUM): Missing Manual Chunk Strategy in Vite + +**File:** `web_src/vite.config.ts:72-91` + +`rollupOptions` are commented out. Large dependencies (monaco-editor, react-flow, lodash) all in main bundle. + +**Recommendation:** Enable `manualChunks` for vendor splitting. + +### FINDING 23 (LOW): `FailInTransaction` Reads Outside Transaction + +**File:** `pkg/models/canvas_node_execution.go:515-523` + +Parent execution fetched outside transaction, then modified inside it. + +**Impact:** Potential lost updates under concurrent access. +**Recommendation:** Use `FindNodeExecutionInTransaction(tx, ...)`. + +--- + +## Summary Table + +| # | Severity | Category | Finding | Est. Impact | +|---|----------|----------|---------|-------------| +| 1 | CRITICAL | Database | Unbounded ListPendingNodeExecutions | OOM risk | +| 2 | CRITICAL | Database | Unbounded ListPendingCanvasEvents | OOM risk | +| 3 | CRITICAL | Database | Unbounded ListNodeRequests (1s poll) | CPU/memory | +| 11 | HIGH | Concurrency | WebSocket Hub deadlock | Full system deadlock | +| 4 | HIGH | Database | N+1 in ListCanvases | 100+ queries/list | +| 5 | HIGH | Database | O(n*m) string comparison | GC pressure | +| 6 | HIGH | Database | Missing ConnMaxLifetime | Stale connections | +| 7 | HIGH | Database | Default pool size of 5 | Contention | +| 10 | HIGH | Concurrency | context.Background() in semaphore | Shutdown hang | +| 13 | HIGH | API | Sequential DB calls in DescribeCanvas | 50-200ms wasted | +| 15 | CRITICAL | Frontend | 6,589-line monolithic component | Re-render storms | +| 16 | HIGH | Frontend | No route-level code splitting | Slow initial load | +| 17 | HIGH | Frontend | Zustand broad re-renders | Unnecessary renders | +| 20 | HIGH | WebSocket | Excessive query invalidation | Thundering herd | + +## Top 5 Priority Fixes + +1. **Fix WebSocket Hub deadlock** (Finding 11) — correctness bug, 30 min fix +2. **Add LIMIT to all polling queries** (Findings 1-3) — one-line changes, 15 min +3. **Increase DB pool size + add ConnMaxLifetime** (Findings 6-7) — config change, 10 min +4. **Debounce WebSocket query invalidation** (Finding 20) — 1 hour +5. **Add route-level code splitting** (Finding 16) — 2 hours + +--- +*Generated by AQE v3 Performance Reviewer Agent* diff --git a/docs/qe-reports-March-28/04-qx-analysis.md b/docs/qe-reports-March-28/04-qx-analysis.md new file mode 100644 index 0000000000..7de719c016 --- /dev/null +++ b/docs/qe-reports-March-28/04-qx-analysis.md @@ -0,0 +1,225 @@ +# SuperPlane Quality Experience (QX) Analysis Report + +**Project**: SuperPlane - Workflow Orchestration Platform +**Scope**: Frontend application at `/workspaces/superplane/web_src/src/` +**Date**: 2026-03-28 +**Analysis Type**: READ-ONLY static code analysis across 7 QX dimensions +**Overall QX Score**: 71/100 (C+) + +--- + +## 1. Error Handling UX + +### 1.1 Error Boundary Implementation -- Score: 72/100 + +**Strengths:** +- A top-level Sentry `ErrorBoundary` wraps the entire app in `main.tsx` (line 14), providing a global crash safety net with a user-friendly `` fallback that displays "Something went wrong" with "Try Again" and "Go Home" buttons. +- Sentry integration (`sentry.ts`) is well-configured with console capture, browser API error tracking, and global handlers for unhandled rejections. + +**Issues Found:** + +**CRITICAL: Only one error boundary exists for the entire application.** There is no route-level or component-level error boundary. If any individual page crashes (e.g., the workflow editor, settings pages), the entire application resets to the error page, causing the user to lose all context. This is particularly damaging for the workflow editor which contains complex state. + +**MODERATE: The `NotFoundPage` component at `components/NotFoundPage.tsx` uses `window.location.href = "/"` (line 23) for navigation, causing a full page reload instead of using React Router's `useNavigate`.** This destroys all in-memory state and forces a complete re-authentication flow. + +**MODERATE: The catch-all route at `App.tsx` (line 116) silently redirects to `/` with `` instead of showing the 404 page.** Users who mistype a URL never receive feedback that their URL was wrong. + +### 1.2 API Error Display -- Score: 68/100 + +**Strengths:** +- A centralized toast utility (`utils/toast.ts`) provides standardized `showErrorToast`, `showSuccessToast`, `showInfoToast`, and `showWarningToast` functions using Sonner. +- Usage limit errors are handled with excellent specificity through `utils/usageLimits.ts`, which maps 8 distinct usage limit error types to user-friendly messages with actionable links. +- The API interceptor (`lib/api-interceptor.ts`) handles 401 responses with redirect preservation. + +**Issues Found:** + +**MODERATE: Many error handlers expose raw API error messages directly to users.** For example, in `pages/workflowv2/index.tsx` (line 2532): +```typescript +const errorMessage = error?.response?.data?.message || error?.message || "Failed to save changes to the canvas"; +``` +The fallback to `error?.message` may expose technical JavaScript error messages. + +**MODERATE: The `AccountContext` (`contexts/AccountContext.tsx`, line 65) silently swallows all errors during account fetch with an empty catch block `catch (_error) {}`.** Users experiencing network issues see no feedback. + +### 1.3 Form Validation Feedback -- Score: 82/100 + +**Strengths:** +- The `useRealtimeValidation` hook provides debounced, real-time validation with three error types: `required`, `validation_rule`, and `visibility`. +- Validation is performance-optimized with hash-based deduplication. +- `aria-invalid` styling is consistently used across input, textarea, button, and badge components. + +--- + +## 2. Loading States -- Score: 62/100 + +**Strengths:** +- A `LoadingButton` component provides consistent loading state for buttons with animated spinner. +- Auth-related pages display contextual loading messages: "Signing in...", "Creating...", "Verifying..." +- A `Skeleton` component exists for shimmer-style loading placeholders. + +**Issues Found:** + +**CRITICAL: Skeleton loaders are defined but barely used in the application.** Only 3 files reference the Skeleton component. Major pages use either plain "Loading..." text or a simple spinner, causing layout shifts. + +**MODERATE: No optimistic updates are implemented.** Despite using TanStack Query with `useMutation`, none of the mutation hooks use `onMutate` for optimistic updates. + +**MODERATE: The organization settings page has three sequential loading gates, each showing a minimal "Loading..." message, creating a multi-step loading experience.** + +--- + +## 3. User Journey Quality + +### 3.1 Workflow Creation/Editing -- Score: 70/100 + +**Strengths:** +- Comprehensive canvas versioning, change request workflows, auto-layout, YAML import/export. +- Unsaved changes tracking with auto-save capability. +- Real-time collaboration through WebSocket updates. + +**Issues Found:** + +**CRITICAL: The workflow page file is 6,589 lines long.** Significant code smell indicating potential for complex, difficult-to-maintain user flows. + +**CRITICAL: No undo/redo support exists for canvas editing.** Users editing workflow canvases cannot undo node additions, deletions, or repositioning. + +**MODERATE: The revert function only restores to the initial state, not to arbitrary undo steps.** + +### 3.2 Navigation -- Score: 76/100 + +**Strengths:** +- Well-implemented Breadcrumbs component with proper ARIA. +- Organization-scoped routing provides clear URL structure. +- Dynamic page titles via `usePageTitle` hook. + +### 3.3 Feedback for Destructive Actions -- Score: 60/100 + +**CRITICAL: The application uses `window.confirm()` for destructive operations in at least 12 locations:** +- Group deletion (`settings/Groups.tsx`, line 60) +- Service account deletion (`settings/ServiceAccounts.tsx`, line 102) +- Secret deletion (`settings/SecretDetail.tsx`, line 163) +- Role deletion (`settings/Roles.tsx`, line 50) +- Canvas version reset (`workflowv2/index.tsx`, line 4833) +- Queue item cancellation (`workflowv2/useOnCancelQueueItemHandler.ts`, line 26) +- Node deletion (`CanvasPage/index.tsx`, line 3169) + +An `AlertDialog` component already exists but is only used in Storybook examples. + +--- + +## 4. Accessibility (a11y) -- Score: 68/100 + +### 4.1 ARIA Attributes -- Score: 74/100 + +**Strengths:** +- Select component implements proper ARIA: `role="button"`, `aria-haspopup="listbox"`, `aria-expanded`. +- Switch component uses `role="switch"`, `aria-checked`, `aria-label`. +- Decorative icons consistently use `aria-hidden="true"`. +- `sr-only` class used across ~20 locations. + +**Issues Found:** + +**MODERATE: The custom Dialog component is missing ARIA attributes.** No `role="dialog"`, `aria-modal`, `aria-labelledby`, or Escape key handling. + +**MODERATE: Switch component uses `focus:outline-none` removing visible focus indicator entirely.** Should use `focus-visible:ring-2` for keyboard accessibility. + +### 4.2 Keyboard Navigation -- Score: 70/100 + +**MODERATE: Tab component does not implement WAI-ARIA Tab Pattern.** Missing `role="tablist"`, `role="tab"`, `aria-selected`. No arrow key navigation between tabs. + +### 4.3 Color and Visual Accessibility -- Score: 65/100 + +**MODERATE: Inconsistent dark mode color contrast.** `text-gray-500` appears frequently without dark mode overrides, potentially failing WCAG AA. + +--- + +## 5. Responsive Design -- Score: 55/100 + +**Strengths:** +- `useIsMobile` hook exists with 768px breakpoint. +- Home page grid uses responsive `grid-cols-1 md:grid-cols-2 lg:grid-cols-3`. + +**Issues Found:** + +**CRITICAL: No `@media` queries in `index.css`.** Responsive design relies entirely on Tailwind utilities — any component missing responsive classes won't adapt. + +**CRITICAL: The workflow canvas editor has no mobile/tablet adaptations.** No mobile fallback, viewport warning, or responsive adaptation. + +**MODERATE: `useIsMobile` hook imported in only 1 location** — the vast majority of the app doesn't consume mobile state. + +--- + +## 6. Feedback and Communication + +### 6.1 Toasts -- Score: 78/100 +- Sonner positioned at bottom-center, four toast levels used consistently. + +### 6.2 Progress Indicators -- Score: 52/100 +- No progress bars for long operations (YAML import, bulk actions). + +### 6.3 Real-time Updates -- Score: 85/100 +- WebSocket integration well-implemented with auto-reconnection, per-node message queuing, 7 event types, and automatic TanStack Query cache invalidation. +- **LOW:** No user-visible WebSocket connection status indicator. + +--- + +## 7. Consistency -- Score: 74/100 + +**CRITICAL: Two competing component systems exist side by side.** Hand-built components in `components/` (lacking accessibility) alongside shadcn/ui components in `components/ui/` and `ui/`. + +**MODERATE: Inconsistent error handling patterns.** Canvas deletion uses proper Dialog; group/role/secret deletion uses `window.confirm()`. + +**MODERATE: Mixed loading state patterns.** `animate-spin`, `Loading...`, `

Loading user...

`, `` all used for similar purposes. + +**LOW: Mixed terminology** — "Canvas", "Workflow", and "Bundle" used for related concepts. + +--- + +## Prioritized Recommendations + +### Priority 1 -- High Impact + +| # | Finding | Impact | Effort | +|---|---------|--------|--------| +| 1 | Replace `window.confirm()` with AlertDialog across 12+ locations | High | Medium | +| 2 | Add route-level Error Boundaries for workflow editor and settings | High | Low | +| 3 | Add ARIA attributes to custom Dialog (`role="dialog"`, `aria-modal`, Escape) | High | Low | +| 4 | Implement skeleton loading for home page and settings pages | High | Medium | + +### Priority 2 -- Moderate Impact + +| # | Finding | Impact | Effort | +|---|---------|--------|--------| +| 5 | Add undo/redo for canvas editing | High | High | +| 6 | Show 404 page instead of silent redirect for unknown routes | Medium | Low | +| 7 | Fix Switch `focus:outline-none` to `focus-visible:ring-2` | Medium | Low | +| 8 | Add mobile viewport warning for canvas editor | Medium | Low | +| 9 | Consolidate component systems — migrate to shadcn equivalents | Medium | High | +| 10 | Add WebSocket connection status indicator | Medium | Low | + +### Priority 3 -- Lower Impact + +| # | Finding | Impact | Effort | +|---|---------|--------|--------| +| 11 | Use `useNavigate` instead of `window.location.href` in error pages | Low | Low | +| 12 | Add optimistic updates for CRUD mutations | Low | Medium | +| 13 | Unify loading state components | Low | Medium | +| 14 | Align terminology: Canvas vs Workflow vs Bundle | Low | Low | +| 15 | Add error recovery in AccountContext | Low | Low | + +--- + +## Score Summary + +| Dimension | Score | Grade | +|-----------|-------|-------| +| Error Handling UX | 72/100 | C+ | +| Loading States | 62/100 | D+ | +| User Journey Quality | 69/100 | D+ | +| Accessibility | 68/100 | D+ | +| Responsive Design | 55/100 | F | +| Feedback & Communication | 73/100 | C | +| Consistency | 74/100 | C | +| **Overall QX Score** | **71/100** | **C+** | + +--- +*Generated by AQE v3 QX Partner Agent* diff --git a/docs/qe-reports-March-28/05-sfdipot-product-factors.md b/docs/qe-reports-March-28/05-sfdipot-product-factors.md new file mode 100644 index 0000000000..477f9c838c --- /dev/null +++ b/docs/qe-reports-March-28/05-sfdipot-product-factors.md @@ -0,0 +1,237 @@ +# SFDIPOT Product Factors Analysis: SuperPlane + +## Heuristic Test Strategy Model (HTSM) -- James Bach's Framework + +**Project**: SuperPlane -- Event-Driven Workflow Orchestration Platform +**Analysis Date**: 2026-03-28 +**Overall Risk Assessment**: MEDIUM-HIGH + +--- + +## Executive Summary + +SuperPlane is a substantial event-driven workflow orchestration platform with 36 Go packages, 43 third-party integrations, a React 19 frontend, and a Python AI agent service. The system's reliance on polling-based workers with semaphore-bounded concurrency and RabbitMQ message passing creates a complex failure surface. 63 test ideas generated across all 7 factors, 14 exploratory test session proposals, and 14 clarifying questions. + +| Priority | Count | Description | +|----------|-------|-------------| +| P0 (Critical) | 8 | Race conditions, security bypasses, infinite loops, data loss | +| P1 (High) | 19 | Auth gaps, missing timeouts, scalability limits | +| P2 (Medium) | 24 | Config risks, test coverage gaps, observability | +| P3 (Low) | 12 | Browser compat, documentation, non-critical UX | + +--- + +## 1. STRUCTURE -- What the Product IS + +### Strengths +- Clean registry pattern using Go `init()` for self-registering components/integrations +- Error sanitizer interceptor prevents internal error leakage +- Panic recovery middleware with Sentry integration +- Well-structured model layer with explicit table name mappings + +### Key Risks +- **P0: Global database singleton** — `database.Conn()` called directly from models, tightly coupling to single connection pool +- **P1: Domain/DB naming mismatch** — Code uses "Canvas" but tables are still "workflows" +- **P1: 214 database migrations** in ~12 months — high velocity increases conflict/corruption risk +- **P1: Dual JWT library versions** — `golang-jwt/jwt/v4` and `v5` both in `go.mod` +- **P1: RabbitMQ 3.8.17 is EOL** — known issues with ordering and cluster partitions +- **P1: `HEALTHCHECK NONE` in production Docker image** — no monitoring outside Kubernetes +- **P2: Hardcoded encryption key in docker-compose.dev.yml** — risk if leaked to production + +### Top Test Ideas +1. Build with `-race` flag and run full test suite to surface data races (P0) +2. Execute all 214 migrations up/down in sequence to confirm reversibility (P1) +3. Start application with RabbitMQ unavailable — does it panic, retry, or degrade? (P0) +4. Generate JWT with v4 claims, validate with v5 parsing to expose cross-version issues (P1) + +--- + +## 2. FUNCTION -- What the Product DOES + +### Core Capabilities +- Canvas (Workflow) management with versioning, change requests, conflict resolution +- Event-driven execution: Event -> EventRouter -> NodeQueueWorker -> NodeExecutor +- 15 components (approval, filter, if, merge, http, ssh, memory, etc.) +- 3 triggers (schedule, start, webhook) +- 43 integrations (AWS, GCP, GitHub, Slack, PagerDuty, etc.) +- Pessimistic locking via `SELECT FOR UPDATE SKIP LOCKED` + +### Key Risks +- **P0: `GenerateUniqueNodeID` infinite loop potential** — loops forever if all IDs exhausted +- **P0: Polling fallback with minute-level latency** — if RabbitMQ fails silently, workflows delayed +- **P1: Semaphore limits hardcoded to 25** — no dynamic scaling +- **P1: Approval component has no timeout** — workflows can hang indefinitely +- **P1: `panic(err)` on missing RabbitMQ URL** — crashes entire server +- **P1: EventRouter silently continues on error** — potential infinite retry of poison events + +### Top Test Ideas +5. Create canvas with 10,000 nodes, test `GenerateUniqueNodeID` collision boundary (P0) +6. Stop RabbitMQ with 50 queued events, restart, measure fallback processing time (P0) +7. Submit 100 concurrent events to same node — confirm exactly 100 executions, no duplicates (P0) +8. Create circular node references (A->B->C->A) — confirm linter catches it, engine doesn't loop (P0) +9. Inject malformed event payload, confirm EventRouter marks as failed vs infinite retry (P0) + +--- + +## 3. DATA -- What it PROCESSES + +### Core Data Flow +Event -> CanvasEvent (pending) -> EventRouter (routed) -> CanvasNodeExecution (pending -> started -> finished) -> Output Events -> Next Node + +### Strengths +- Execution chain tracking via `RootEventID`, `PreviousExecutionID`, `ParentExecutionID` +- Soft deletes with 30-day grace period +- Event retention worker with configurable per-org windows +- Configuration snapshots on execution creation + +### Key Risks +- **P0: 64KB event payload limit enforced only at API layer** — internal events may bypass +- **P1: Unbounded `ListPendingNodeExecutions`/`ListPendingCanvasEvents`** — OOM risk after outage +- **P1: Canvas memory has no size limit** — workflows can grow unbounded +- **P0: WebhookProvisioner 3-phase process** — crash between phases 2-3 creates duplicate webhooks +- **P1: No optimistic concurrency control on non-versioned canvas updates** + +### Top Test Ideas +10. Create event with exactly 64KB payload (success), then 64KB+1 (reject) (P0) +11. Insert 10,000 pending events, start EventRouter, measure memory consumption (P1) +12. Simulate crash after webhook provisioning but before DB update — check for duplicates (P0) +13. Two users update same non-versioned canvas simultaneously — confirm consistency (P1) + +--- + +## 4. INTERFACES -- How it CONNECTS + +### APIs +- gRPC Internal (port 50051): 13 services +- REST Public (port 8000): gRPC-gateway with Swagger UI +- WebSocket: Real-time canvas updates +- Webhook endpoints: 43 integration receivers + +### Key Risks +- **P1: WebSocket hub `Run()` has no exit condition** — prevents clean shutdown +- **P1: No rate limiting on inbound webhook endpoints** — flood risk +- **P0: Only 7 frontend spec files for ~138K lines TypeScript** — extreme coverage gap +- **P2: gRPC reflection enabled in production** — aids API enumeration + +### Top Test Ideas +14. Open 500 WebSocket connections, broadcast event, measure delivery latency (P1) +15. Send 1,000 webhooks/sec to single endpoint — does it queue, drop, or 429? (P1) +16. Open canvas in two browser tabs, edit both, confirm WebSocket sync without data loss (P1) + +--- + +## 5. PLATFORM -- What it DEPENDS ON + +### Infrastructure +- PostgreSQL 17.5, RabbitMQ 3.8.17, Ubuntu 22.04, Go 1.25.3, Node.js +- OpenTelemetry, Sentry, Chromium (E2E tests) + +### Key Risks +- **P1: Go 1.25 is bleeding edge** — potential compiler/stdlib instability +- **P1: RabbitMQ 3.8.17 is 5+ years old** with known CVEs +- **P2: No distributed cache** — in-process caching breaks multi-instance deployments +- **P2: WebSocket URL construction** may fail behind SSL-terminating proxy + +### Top Test Ideas +17. Deploy two instances behind load balancer — confirm usage limit cache consistency (P1) +18. Kill RabbitMQ during active execution — confirm graceful degradation without data loss (P0) +19. Access app through nginx SSL reverse proxy — confirm WebSocket upgrades to `wss:` (P2) + +--- + +## 6. OPERATIONS -- How it's USED + +### Startup Sequence +1. Validate 6 required env vars → 2. Create database → 3. Run migrations → 4. Start server + +### Key Risks +- **P0: `docker-entrypoint.sh` line 6** checks `$DB_PASSWORD` but prints "DB username not set" — wrong error message +- **P1: No migration version locking** — concurrent container starts cause migration conflicts +- **P1: 60+ environment variables** with no validation schema +- **P1: No HTTP health check endpoint** — gRPC has one but public API doesn't +- **P1: Installation admin promotion has no audit trail** + +### Top Test Ideas +20. Start with `DB_PASSWORD` unset — confirm error correctly identifies the variable (P0) +21. Start two containers simultaneously — confirm migrations don't conflict (P1) +22. Promote user to admin, perform actions, demote — confirm immediate revocation (P1) +23. Delete organization, confirm soft-delete cascade, test recovery within 30 days (P1) + +--- + +## 7. TIME -- WHEN Things Happen + +### Concurrency Mechanisms +- Semaphore-bounded goroutines (25 per worker) +- `SELECT FOR UPDATE SKIP LOCKED` for execution locking +- `sync.RWMutex` in registry and WebSocket hub +- RabbitMQ consumer concurrency + +### Key Risks +- **P0: WebSocket hub race condition** — `BroadcastToWorkflow` during client disconnect could panic +- **P0: NodeQueueWorker 1-second polling** creates unnecessary DB pressure under load +- **P1: No execution timeout** — hung HTTP calls leave executions in "started" forever +- **P1: Event retention worker may corrupt execution chains** if deleting during active execution +- **P1: No circuit breaker on integration calls** — down APIs exhaust semaphore slots +- **P1: RabbitMQ doesn't guarantee message ordering** — may cause incorrect execution sequences + +### Top Test Ideas +24. 5 workers processing same canvas events — confirm no double-execution or lost events (P0) +25. During broadcast to 100 WS clients, disconnect 50 simultaneously — confirm no panics (P0) +26. Configure HTTP component to never-responding server — confirm execution eventually times out (P0) +27. Trigger event retention on root event while child is still running — confirm safe handling (P1) +28. Send 100 sequential events rapidly — confirm processing preserves order (P1) + +--- + +## Exploratory Test Session Proposals + +| # | Session | Factor | Priority | +|---|---------|--------|----------| +| 1 | **Dependency Surgery** — Remove integration import, explore system behavior | Structure | P2 | +| 2 | **Migration Archaeology** — Verify 5 random migration up/down reversibility | Structure | P1 | +| 3 | **Canvas Complexity** — Build increasingly complex canvases until engine breaks | Function | P0 | +| 4 | **Integration Failure Modes** — Invalid credentials for top 5 integrations | Function | P1 | +| 5 | **Expression Injection** — Adversarial inputs via `{{...}}` template syntax | Function | P0 | +| 6 | **Data Boundary Walking** — Push every field to extremes | Data | P1 | +| 7 | **Secret Lifecycle** — Create/update/use/delete/re-create secrets | Data | P1 | +| 8 | **WebSocket Resilience** — Throttle to 3G, trigger rapid events | Interfaces | P1 | +| 9 | **API Enumeration** — Use gRPC reflection to enumerate and probe all methods | Interfaces | P2 | +| 10 | **Resource Starvation** — 256MB memory, pool size 1 | Platform | P1 | +| 11 | **Chaos Configuration** — Randomized env var combinations | Operations | P1 | +| 12 | **Upgrade Simulation** — Run old schema, insert data, migrate to current | Operations | P1 | +| 13 | **Thundering Herd** — 100 webhooks simultaneously | Time | P0 | +| 14 | **Long-Running Workflow** — 7-day wait component across restarts | Time | P1 | + +--- + +## Clarifying Questions + +1. Is there an ADR for the Canvas/Workflow naming migration? +2. What is the intended deployment topology (single vs multi-instance)? +3. What happens when a node execution hangs indefinitely? Is there a planned timeout? +4. Is the canvas linter enforced before publication or advisory only? +5. What is the recovery path for failed webhook provisioning (idempotency)? +6. What are the data retention defaults and limits per organization? +7. Is there a maximum canvas memory size per workflow? +8. Are WebSocket connections authorized at the organization level? +9. Is the OpenAPI spec autogenerated or manually maintained? +10. Why does `go.mod` specify Go 1.25? (not in standard release schedule) +11. Is there a plan to upgrade RabbitMQ from 3.8.17? +12. What is the backup and disaster recovery strategy? +13. How does the system handle clock skew between instances and database? +14. What is the expected maximum event throughput per canvas per second? + +--- + +## Automation Fitness + +| Type | Count | % | Rationale | +|------|-------|---|-----------| +| Unit | 19 | 30% | Config validation, error sanitizer, expression parsing, JWT | +| Integration | 26 | 41% | Worker behavior, DB locking, migration testing, API contracts | +| E2E | 10 | 16% | Canvas editor flows, approval flows, WebSocket real-time | +| Human Exploration | 8 | 13% | Complex failure modes, UX quality, adversarial discovery | + +--- +*Generated by AQE v3 Product Factors Assessor Agent (HTSM/SFDIPOT)* diff --git a/docs/qe-reports-March-28/06-test-coverage-analysis.md b/docs/qe-reports-March-28/06-test-coverage-analysis.md new file mode 100644 index 0000000000..518561b08b --- /dev/null +++ b/docs/qe-reports-March-28/06-test-coverage-analysis.md @@ -0,0 +1,228 @@ +# SuperPlane Test Suite & Coverage Analysis Report + +**Date**: 2026-03-28 +**Scope**: All test layers — Go backend, React frontend, Python agent, E2E +**Total Test Files**: 534 + +--- + +## Executive Summary + +SuperPlane has **strong backend test coverage** with 493 Go test files across `pkg/`, but a **critically thin frontend test layer** with only 7 spec files covering 716+ source files (~1% file-level coverage). The E2E suite is well-structured but carries flakiness risk from 148 `Sleep` calls. Significant gaps exist in security, accessibility, performance, contract, and visual regression testing. + +--- + +## 1. Test Inventory + +| Category | Framework | Count | Notes | +|---|---|---|---| +| Go Unit Tests | Go testing + testify | 493 | All in `pkg/` | +| Go E2E Tests | Go + Playwright-go | 25 | In `test/e2e/` | +| Frontend Unit Tests | Vitest 4.0.18 | 7 | In `web_src/src/` | +| Python Agent Tests | Pytest | 9 | In `agent/tests/` | +| Storybook Stories | Storybook 9 | 73 | Visual documentation (no assertions) | +| Consumer Tests | Go test helper | 1 | AMQP consumer | + +### All 7 Frontend Test Files + +1. `web_src/src/components/AutoCompleteInput/core.spec.ts` +2. `web_src/src/pages/workflowv2/autoLayout.spec.ts` +3. `web_src/src/pages/workflowv2/conflictResolverUtils.spec.ts` +4. `web_src/src/pages/workflowv2/mappers/http.spec.ts` +5. `web_src/src/utils/errors.spec.ts` +6. `web_src/src/utils/usageLimits.spec.ts` +7. `web_src/src/utils/withOrganizationHeader.spec.ts` + +--- + +## 2. Test Quality Assessment + +### Naming Conventions +- **Frontend (GOOD)**: Descriptive `it("does X when Y")` naming consistently +- **Backend Go (MIXED)**: Some double-underscore style (`Test__AESGCMEncryptor`), some conventional +- **E2E (GOOD)**: BDD-style with step objects + +### AAA Pattern Adherence +- **Frontend (EXCELLENT)**: All 7 files follow clear Arrange-Act-Assert +- **Backend (GOOD)**: Clear setup, action, assertion phases +- **E2E (GOOD)**: Step-based patterns mapping to Given-When-Then + +### Assertion Quality +- **Frontend (EXCELLENT)**: Specific matchers (`toContain`, `toEqual`, `toMatchObject`, `not.toThrow`) +- **Backend (GOOD)**: Consistent testify usage (`require.NoError`, `assert.Equal`) +- **E2E (GOOD)**: Custom `session.AssertVisible`, `session.AssertText` + +### Flakiness Indicators +- **E2E (HIGH RISK)**: 148 `Sleep` calls. `--rerun-fails=3` confirms flakiness is operational. +- **Frontend (LOW RISK)**: No sleeps or timeouts +- **Backend (LOW RISK)**: No time-dependent patterns + +--- + +## 3. Coverage Gaps + +### 3.1 Frontend: CRITICAL (~1% File Coverage) + +**Pages with ZERO tests (all 8 page directories untested):** + +| Page | Files | Risk | +|---|---|---| +| `pages/admin/` | 11 files | HIGH (security-sensitive) | +| `pages/auth/` | 6 files | CRITICAL (authentication flow) | +| `pages/canvas/` | 4 files | HIGH (core product) | +| `pages/home/` | 4 files | MEDIUM | +| `pages/organization/settings/` | 19 files | HIGH | +| `pages/workflowv2/` | ~20 non-tested files | HIGH (core editor) | + +**Other untested frontend layers:** + +| Layer | Source Files | Tests | Coverage | +|---|---|---|---| +| Components | 33 directories | 1 test | 3% | +| UI Layer | 146 files | 0 tests | 0% | +| Hooks | 16 files | 0 tests | 0% | +| Stores | 1 file | 0 tests | 0% | +| Contexts | 2 files | 0 tests | 0% | +| Lib | 5 files | 0 tests | 0% | +| Utils | 14 files | 3 tested | 21% | + +### 3.2 Backend: Packages Without Tests + +| Package | Risk | +|---|---| +| `pkg/core` | HIGH (core business logic) | +| `pkg/database` | HIGH (data integrity) | +| `pkg/config` | HIGH (misconfiguration cascades) | +| `pkg/secrets` | CRITICAL (security) | +| `pkg/server` | MEDIUM | +| `pkg/retry` | MEDIUM | +| `pkg/web`, `pkg/widgets`, `pkg/templates` | LOW-MEDIUM | + +### 3.3 gRPC Actions: 9 of ~17 Packages Have No Tests + +Missing: `agents`, `blueprints`, `components`, `integrations`, `me`, `messages`, `serviceaccounts`, `triggers`, `widgets` + +### 3.4 Integration Coverage: 42/43 Tested (Excellent) + +Only `hetzner` (0/8 files) lacks tests. This is a strength. + +--- + +## 4. Test Pyramid Health + +| Layer | Count | Percentage | Ideal | +|---|---|---|---| +| Unit (Go + Frontend + Python) | 509 | 89.4% | 70% | +| Integration (gRPC actions) | ~35 | 6.1% | 20% | +| E2E (Playwright-Go) | 25 | 4.4% | 10% | + +**Assessment:** Pyramid shape is correct (not inverted), but: +- Over-indexed on unit tests at expense of integration layer +- Integration layer too thin (gRPC action tests underrepresented) +- **Frontend pyramid collapses entirely** — only layer is Storybook + E2E + +--- + +## 5. Test Infrastructure + +### CI/CD +- **GitHub Actions**: Only PR title validation and release notifications. **No test execution in GitHub Actions.** +- **Semaphore CI** (inferred): Primary CI with sharded E2E execution +- **gotestsum**: JUnit XML reporting, `--rerun-fails=3` +- **Docker-based**: All tests via `docker-compose` with test database + +### Parallelization +- **Backend**: `-p 1` (serial) — likely due to shared DB state +- **E2E**: Shard-based parallelism across CI workers (well-implemented) +- **Frontend**: Vitest native (only 7 files, not a concern) + +### Test Data Management +- **E2E**: Clean-slate per test (`resetDatabase()` + `setupUserAndOrganization()`) +- **Backend**: Real DB (`superplane_test`), VCR cassettes for HTTP recording +- **Fixtures**: Minimal (1 file), data created programmatically + +### Mock/Stub Patterns +- **Backend**: VCR (`go-vcr`) for HTTP API mocking +- **Frontend**: Minimal mocking configured +- **E2E**: Fully integrated (real backend + real Vite dev server) + +--- + +## 6. E2E Test Quality + +### Step Object Pattern: WELL IMPLEMENTED +- `TestLoginPageSteps`, `CanvasPageSteps`, `CanvasSteps`, `TestSession` — mature reusable patterns + +### Browser Coverage +- **Chromium only** — no Firefox or WebKit +- **Desktop only** (2560x1440) — no mobile viewport testing + +### Covered User Journeys +Authentication, canvas CRUD, organization management, admin ops, versioning, change requests + +### Missing User Journeys +Integration setup, canvas execution end-to-end, custom component builder, onboarding, multi-org switching, import/export beyond basic YAML + +--- + +## 7. Missing Test Categories + +| Category | Status | Priority | +|---|---|---| +| Security tests | ABSENT | P0 | +| Performance tests / benchmarks | ABSENT | P1 | +| Accessibility tests (axe-core) | ABSENT | P2 | +| Contract tests (Pact/OpenAPI) | ABSENT | P2 | +| Visual regression tests | ABSENT | P2 | +| Mutation testing | ABSENT | P3 | + +--- + +## 8. Prioritized Recommendations + +### P0 — Critical + +1. **Frontend unit test coverage**: Prioritize `pages/auth/`, `lib/expressionParser.ts`, `lib/exprEvaluator.ts`, custom hooks (`useCanvasData`, `useCanvasWebsocket`), `utils/canvasLinter.ts` +2. **Security test gaps**: Add tests for `pkg/secrets`, `pkg/authorization` boundary cases, JWT edge cases +3. **Backend foundational packages**: Test `pkg/core`, `pkg/database`, `pkg/config` + +### P1 — High Priority + +4. **Reduce E2E flakiness**: Replace 148 `Sleep` calls with proper waits (follow `WaitForCanvasSaveStatusSaved` pattern) +5. **gRPC action test coverage**: 9 untested action packages +6. **Integration test layer**: Service-level tests between gRPC, business logic, and database +7. **Frontend component testing**: Start with most interactive components + +### P2 — Medium Priority + +8. Cross-browser E2E (Firefox + WebKit) +9. API contract testing (OpenAPI validation) +10. Accessibility testing (axe-core in Storybook/Vitest) +11. Visual regression testing (leverage 73 Storybook stories) +12. Go benchmarks for critical paths + +### P3 — Strategic + +13. Mutation testing (Stryker for TS) +14. Mobile viewport E2E tests +15. Test data factories +16. CI/CD test pipeline in GitHub Actions + +--- + +## Key Metrics + +| Metric | Value | Assessment | +|---|---|---| +| Go test file:source ratio | 493:1241 (40%) | Good | +| Frontend test:source ratio | 7:716 (~1%) | **CRITICAL** | +| Backend packages without tests | 12/35 (34%) | Needs work | +| All frontend pages untested | 8/8 (100%) | **CRITICAL** | +| All frontend hooks untested | 16/16 (100%) | **CRITICAL** | +| Entire UI layer untested | 146/146 (100%) | **CRITICAL** | +| E2E Sleep calls | 148 | High flakiness | +| Browser coverage | Chromium only | Single browser | +| Missing test categories | 6 (security, perf, a11y, contract, visual, mutation) | Significant gaps | + +--- +*Generated by AQE v3 Test Architect Agent* diff --git a/docs/qe-reports-March-28/07-mcp-fleet-results.md b/docs/qe-reports-March-28/07-mcp-fleet-results.md new file mode 100644 index 0000000000..fe8a8d2129 --- /dev/null +++ b/docs/qe-reports-March-28/07-mcp-fleet-results.md @@ -0,0 +1,111 @@ +# MCP Fleet Raw Results +**Date:** March 28, 2026 +**Fleet ID:** fleet-bf88d6ec +**Topology:** Hierarchical | Max Agents: 15 + +--- + +## 1. Quality Assessment (quality_assess) + +**Status:** Completed | **Duration:** 49.6s | **Gate:** FAILED + +| Metric | Value | Threshold | Status | +|--------|-------|-----------|--------| +| Overall Score | 49.0/100 | 70 | FAIL | +| Coverage | -1 (not measured) | 80% | N/A | +| Complexity | 30.95 | < 15 | FAIL | +| Maintainability | 57.46 | > 65 | FAIL | +| Security | 85.0 | > 80 | PASS | + +**Recommendations:** +- [CRITICAL] Reduce Code Complexity: Average cyclomatic complexity is 30.95 +- [CRITICAL] Overall Quality Improvement Needed: Multiple areas need attention + +--- + +## 2. Security Scan — Backend (pkg/) + +**Status:** Completed | **Duration:** 2.2s | **Files Scanned:** 1,734 + +| Severity | Count | +|----------|-------| +| Critical | 172 | +| High | 2 | +| Medium | 0 | +| Low | 0 | +| **Total** | **174** | + +### False Positive Analysis + +Manual verification of top flagged items revealed that the **majority are false positives**: + +| File | Line | Finding | Verdict | +|------|------|---------|---------| +| `authentication/context.go` | 10 | `TokenScopesMetadataKey = "x-token-scopes"` | FALSE POSITIVE — metadata key name | +| `integrations/aws/aws.go` | 37 | `APIKeyHeaderName = "X-Superplane-Secret"` | FALSE POSITIVE — header name constant | +| `integrations/aws/aws.go` | 38 | `EventBridgeConnectionSecretName = "eventbridge.connection.secret"` | FALSE POSITIVE — key name | +| `cli/commands/secrets/common.go` | 18 | `SecretKind = "Secret"` | FALSE POSITIVE — kind identifier | +| `authentication/authentication.go` | 168 | `AccessToken: "dev-token-" + provider` | **TRUE POSITIVE** — mock dev token | + +**Estimated True Positive Rate:** ~5-10% (mostly struct field names, constant identifiers) + +### Genuine Concerns +1. Dev mock token in `authentication.go:168` — verify dev-mode guard +2. Need to verify `impersonation/session.go` handling + +--- + +## 3. Coverage Analysis — Frontend (web_src/src/) + +**Status:** Completed | **Files Analyzed:** 790 + +| Metric | Value | +|--------|-------| +| Avg Line Coverage | 79.1% | +| Avg Branch Coverage | 99.2% | +| Avg Function Coverage | 20.8% | +| Files with 0% Function Coverage | 489 (61.9%) | +| Total Coverage Gaps | 727 | + +### Critical Coverage Gaps (sample) + +| File | Risk | Reason | +|------|------|--------| +| `pages/auth/Login.tsx` | 0.5 | Missing test case | +| `pages/auth/OwnerSetup.tsx` | 0.5 | Missing test case | +| `pages/organization/settings/CreateRolePage.tsx` | 0.5 | Missing test case | +| `hooks/useCanvasData.ts` | 0.5 | Missing test case | +| `hooks/useCanvasWebsocket.ts` | 0.5 | Missing test case | +| `components/AutoCompleteSelect/index.tsx` | 0.5 | Missing test case | +| `components/CreateCustomComponentModal/index.tsx` | 0.5 | Missing test case | + +--- + +## 4. Defect Prediction — Backend (pkg/) + +**Status:** Completed | **Duration:** 2.8s + +- **Predicted Defects:** 0 +- **Risk Score:** 0 +- **Assessment:** No files exceeded the defect probability threshold — code looks healthy + +--- + +## 5. Code Index + +| Target | Files Indexed | Symbols | Relations | +|--------|--------------|---------|-----------| +| `web_src/src/` | 0 | 0 | 0 | +| `pkg/` | 1,734 | 0 | 0 | + +--- + +## 6. Task Orchestration + +**Task ID:** task_05618b3c +**Strategy:** Parallel +**Routing:** Tier 2 (Sonnet) — Complexity 50/100 + +--- + +*Raw results stored in `.agentic-qe/results/`* diff --git a/pkg/grpc/actions/canvases/lint_canvas.go b/pkg/grpc/actions/canvases/lint_canvas.go new file mode 100644 index 0000000000..75004717cf --- /dev/null +++ b/pkg/grpc/actions/canvases/lint_canvas.go @@ -0,0 +1,94 @@ +package canvases + +import ( + "encoding/json" + "errors" + "net/http" + + "github.com/google/uuid" + "github.com/gorilla/mux" + log "github.com/sirupsen/logrus" + "github.com/superplanehq/superplane/pkg/database" + "github.com/superplanehq/superplane/pkg/linter" + "github.com/superplanehq/superplane/pkg/models" + "github.com/superplanehq/superplane/pkg/registry" + "gorm.io/gorm" +) + +// jsonError writes a JSON error response with the given status code. +func jsonError(w http.ResponseWriter, msg string, code int) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(code) + _ = json.NewEncoder(w).Encode(map[string]string{"error": msg}) +} + +// LintCanvasHandler returns an http.HandlerFunc that lints a canvas by ID. +// It reads the canvas spec from the live version and runs the linter. +// +// Route: POST /api/v1/canvases/{canvasId}/lint +func LintCanvasHandler(reg *registry.Registry) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Extract organization ID from header (set by auth middleware). + orgID := r.Header.Get("X-Organization-Id") + if orgID == "" { + jsonError(w, "missing organization id", http.StatusUnauthorized) + return + } + + // Extract canvas ID from gorilla/mux route variables. + canvasID := mux.Vars(r)["canvasId"] + if canvasID == "" { + jsonError(w, "missing canvas id", http.StatusBadRequest) + return + } + + orgUUID, err := uuid.Parse(orgID) + if err != nil { + jsonError(w, "invalid organization id", http.StatusBadRequest) + return + } + + canvasUUID, err := uuid.Parse(canvasID) + if err != nil { + jsonError(w, "invalid canvas id", http.StatusBadRequest) + return + } + + // Load the canvas. + canvas, err := models.FindCanvas(orgUUID, canvasUUID) + if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + jsonError(w, "canvas not found", http.StatusNotFound) + return + } + log.WithError(err).Error("failed to find canvas") + jsonError(w, "internal error", http.StatusInternalServerError) + return + } + + // Load the live version. + version, err := models.FindLiveCanvasVersionByCanvasInTransaction(database.Conn(), canvas) + if err != nil { + log.WithError(err).Error("failed to find live canvas version") + jsonError(w, "failed to load canvas version", http.StatusInternalServerError) + return + } + + // Run the linter. + nodes := []models.Node(version.Nodes) + edges := []models.Edge(version.Edges) + result := linter.LintCanvas(nodes, edges, reg) + + // Return JSON response. + w.Header().Set("Content-Type", "application/json") + if result.Status == "fail" { + w.WriteHeader(http.StatusUnprocessableEntity) + } else { + w.WriteHeader(http.StatusOK) + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + log.WithError(err).Error("failed to encode lint result") + } + } +} diff --git a/pkg/grpc/actions/canvases/update_canvas_version.go b/pkg/grpc/actions/canvases/update_canvas_version.go index ef9597a2d7..23f44f07c1 100644 --- a/pkg/grpc/actions/canvases/update_canvas_version.go +++ b/pkg/grpc/actions/canvases/update_canvas_version.go @@ -12,6 +12,7 @@ import ( "github.com/superplanehq/superplane/pkg/crypto" "github.com/superplanehq/superplane/pkg/database" "github.com/superplanehq/superplane/pkg/grpc/actions/messages" + "github.com/superplanehq/superplane/pkg/linter" "github.com/superplanehq/superplane/pkg/models" pb "github.com/superplanehq/superplane/pkg/protos/canvases" usagepb "github.com/superplanehq/superplane/pkg/protos/usage" @@ -90,6 +91,20 @@ func UpdateCanvasVersionWithUsage( return nil, err } + // Quality gate: lint the canvas after layout and log results. + // Runs in warn-only mode — does not block saves, so users can always + // save their work. Issues are surfaced via the lint badge and API. + lintResult := linter.LintCanvas(nodes, edges, registry) + if lintResult.Summary.ErrorCount > 0 || lintResult.Summary.WarningCount > 0 { + log.WithFields(log.Fields{ + "canvas_id": canvasID, + "quality_score": lintResult.QualityScore, + "quality_grade": string(lintResult.QualityGrade), + "error_count": lintResult.Summary.ErrorCount, + "warning_count": lintResult.Summary.WarningCount, + }).Warn("Canvas has quality issues") + } + expandedNodes, err := expandNodes(organizationID, nodes) if err != nil { return nil, err diff --git a/pkg/linter/linter.go b/pkg/linter/linter.go new file mode 100644 index 0000000000..1fa195a2dc --- /dev/null +++ b/pkg/linter/linter.go @@ -0,0 +1,714 @@ +package linter + +import ( + "fmt" + "regexp" + "strings" + + "github.com/superplanehq/superplane/pkg/models" + "github.com/superplanehq/superplane/pkg/registry" +) + +// Severity indicates how critical a lint issue is. +type Severity string + +const ( + SeverityError Severity = "error" + SeverityWarning Severity = "warning" + SeverityInfo Severity = "info" +) + +// LintIssue represents a single problem detected during linting. +type LintIssue struct { + Severity Severity `json:"severity"` + Rule string `json:"rule"` + NodeID string `json:"nodeId"` + NodeName string `json:"nodeName"` + Message string `json:"message"` +} + +// QualityGrade represents an A-F quality rating. +type QualityGrade string + +const ( + GradeA QualityGrade = "A" + GradeB QualityGrade = "B" + GradeC QualityGrade = "C" + GradeD QualityGrade = "D" + GradeF QualityGrade = "F" +) + +// LintSummary provides aggregate counts of the lint results. +type LintSummary struct { + TotalNodes int `json:"totalNodes"` + TotalEdges int `json:"totalEdges"` + ErrorCount int `json:"errorCount"` + WarningCount int `json:"warningCount"` + InfoCount int `json:"infoCount"` +} + +// LintResult is the complete output of running the linter on a canvas. +type LintResult struct { + Status string `json:"status"` // "pass" or "fail" + Errors []LintIssue `json:"errors"` + Warnings []LintIssue `json:"warnings"` + Info []LintIssue `json:"info"` + Summary LintSummary `json:"summary"` + QualityScore int `json:"qualityScore"` // 0-100 + QualityGrade QualityGrade `json:"qualityGrade"` // A-F +} + +// computeQualityScore returns a score from 0-100 and a letter grade. +// Each error deducts 15 points (max 60 total), each warning deducts 5 (max 30), +// each info deducts 1 (max 10). This prevents scores from bottoming out too quickly. +func computeQualityScore(errors, warnings, info int) (int, QualityGrade) { + errorPenalty := errors * 15 + if errorPenalty > 60 { + errorPenalty = 60 + } + warningPenalty := warnings * 5 + if warningPenalty > 30 { + warningPenalty = 30 + } + infoPenalty := info * 1 + if infoPenalty > 10 { + infoPenalty = 10 + } + score := 100 - errorPenalty - warningPenalty - infoPenalty + if score < 0 { + score = 0 + } + + var grade QualityGrade + switch { + case score >= 90: + grade = GradeA + case score >= 75: + grade = GradeB + case score >= 60: + grade = GradeC + case score >= 40: + grade = GradeD + default: + grade = GradeF + } + + return score, grade +} + +// terminalComponents are components that naturally end a workflow and +// should not be flagged as dead-ends. +var terminalComponents = map[string]bool{ + "approval": true, + "slack.sendTextMessage": true, + "slack.waitForButtonClick": true, + "github.createIssue": true, + "github.createIssueComment": true, + "github.createRelease": true, + "github.updateIssue": true, + "github.publishCommitStatus": true, + "github.addReaction": true, + "pagerduty.createIncident": true, + "pagerduty.resolveIncident": true, + "pagerduty.escalateIncident": true, + "pagerduty.annotateIncident": true, + "pagerduty.acknowledgeIncident": true, +} + +// destructiveComponents are components that perform irreversible or +// high-impact actions and should require an upstream approval gate. +var destructiveComponents = map[string]bool{ + "pagerduty.resolveIncident": true, + "pagerduty.escalateIncident": true, + "github.deleteRelease": true, + "github.createRelease": true, +} + +// nodeRefDoubleQuotePattern matches $["Node Name"] references in expressions. +var nodeRefDoubleQuotePattern = regexp.MustCompile(`\$\["([^"]+)"\]`) + +// nodeRefSingleQuotePattern matches $['Node Name'] references in expressions. +var nodeRefSingleQuotePattern = regexp.MustCompile(`\$\['([^']+)'\]`) + +// LintCanvas performs static analysis on a canvas defined by nodes and edges. +// The registry parameter is accepted for future use and may be nil. +func LintCanvas(nodes []models.Node, edges []models.Edge, _ *registry.Registry) *LintResult { + result := &LintResult{ + Errors: []LintIssue{}, + Warnings: []LintIssue{}, + Info: []LintIssue{}, + } + + // Build lookup maps. + nodeByID := make(map[string]models.Node, len(nodes)) + nodeByName := make(map[string]bool, len(nodes)) + outgoing := make(map[string][]models.Edge) + incoming := make(map[string][]models.Edge) + triggers := make([]models.Node, 0) + widgets := make(map[string]bool) + + for _, n := range nodes { + nodeByID[n.ID] = n + nodeByName[n.Name] = true + + if n.Type == "TYPE_TRIGGER" { + triggers = append(triggers, n) + } + if n.Type == "TYPE_WIDGET" { + widgets[n.ID] = true + } + } + + for _, e := range edges { + outgoing[e.SourceID] = append(outgoing[e.SourceID], e) + incoming[e.TargetID] = append(incoming[e.TargetID], e) + } + + // Run all rule checkers. + checkDuplicateNodes(nodes, result) + checkEdgeValidity(edges, nodeByID, widgets, result) + checkCycles(nodes, edges, widgets, result) + checkOrphanNodes(nodes, triggers, outgoing, widgets, result) + checkDeadEnds(nodes, outgoing, widgets, result) + checkMissingApprovalGate(nodes, incoming, nodeByID, widgets, result) + checkMissingRequiredConfig(nodes, incoming, result) + checkExpressionSyntax(nodes, nodeByName, widgets, result) + checkUnreachableBranches(nodes, outgoing, result) + + // Compute summary. + result.Summary = LintSummary{ + TotalNodes: len(nodes), + TotalEdges: len(edges), + ErrorCount: len(result.Errors), + WarningCount: len(result.Warnings), + InfoCount: len(result.Info), + } + + if len(result.Errors) > 0 { + result.Status = "fail" + } else { + result.Status = "pass" + } + + result.QualityScore, result.QualityGrade = computeQualityScore( + len(result.Errors), len(result.Warnings), len(result.Info), + ) + + return result +} + +// checkDuplicateNodes detects duplicate node IDs and duplicate node names. +func checkDuplicateNodes(nodes []models.Node, result *LintResult) { + seenIDs := make(map[string]bool, len(nodes)) + seenNames := make(map[string]bool, len(nodes)) + + for _, n := range nodes { + if seenIDs[n.ID] { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "duplicate-node-id", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Duplicate node ID %q", n.ID), + }) + } + seenIDs[n.ID] = true + + if n.Type == "TYPE_WIDGET" { + continue + } + if seenNames[n.Name] { + result.Warnings = append(result.Warnings, LintIssue{ + Severity: SeverityWarning, + Rule: "duplicate-node-name", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Duplicate node name %q — expression references may be ambiguous", n.Name), + }) + } + seenNames[n.Name] = true + } +} + +// checkEdgeValidity validates edges for dangling references, self-loops, +// duplicate edges, and edges involving widget nodes. +func checkEdgeValidity(edges []models.Edge, nodeByID map[string]models.Node, widgets map[string]bool, result *LintResult) { + type edgeKey struct{ src, tgt, ch string } + seen := make(map[edgeKey]bool, len(edges)) + + for i, e := range edges { + // Dangling source/target. + if _, ok := nodeByID[e.SourceID]; !ok { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "invalid-edge", + NodeID: e.SourceID, + Message: fmt.Sprintf("Edge %d references nonexistent source node %q", i, e.SourceID), + }) + } + if _, ok := nodeByID[e.TargetID]; !ok { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "invalid-edge", + NodeID: e.TargetID, + Message: fmt.Sprintf("Edge %d references nonexistent target node %q", i, e.TargetID), + }) + } + + // Self-loop. + if e.SourceID == e.TargetID { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "invalid-edge", + NodeID: e.SourceID, + NodeName: nodeByID[e.SourceID].Name, + Message: fmt.Sprintf("Edge %d is a self-loop on node %q", i, e.SourceID), + }) + } + + // Duplicate edge. + key := edgeKey{e.SourceID, e.TargetID, e.Channel} + if seen[key] { + result.Warnings = append(result.Warnings, LintIssue{ + Severity: SeverityWarning, + Rule: "duplicate-edge", + NodeID: e.SourceID, + NodeName: nodeByID[e.SourceID].Name, + Message: fmt.Sprintf("Duplicate edge from %q to %q on channel %q", e.SourceID, e.TargetID, e.Channel), + }) + } + seen[key] = true + + // Widget as edge endpoint. + if widgets[e.SourceID] { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "invalid-edge", + NodeID: e.SourceID, + NodeName: nodeByID[e.SourceID].Name, + Message: fmt.Sprintf("Edge %d uses widget node %q as source", i, e.SourceID), + }) + } + if widgets[e.TargetID] { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "invalid-edge", + NodeID: e.TargetID, + NodeName: nodeByID[e.TargetID].Name, + Message: fmt.Sprintf("Edge %d uses widget node %q as target", i, e.TargetID), + }) + } + } +} + +// checkCycles detects cycles in the non-widget node graph using Kahn's algorithm. +func checkCycles(nodes []models.Node, edges []models.Edge, widgets map[string]bool, result *LintResult) { + // Build adjacency for non-widget nodes only. + inDegree := make(map[string]int) + adj := make(map[string][]string) + + for _, n := range nodes { + if widgets[n.ID] { + continue + } + inDegree[n.ID] = 0 + } + + for _, e := range edges { + if widgets[e.SourceID] || widgets[e.TargetID] { + continue + } + adj[e.SourceID] = append(adj[e.SourceID], e.TargetID) + inDegree[e.TargetID]++ + } + + // Kahn's: start from nodes with in-degree 0. + queue := make([]string, 0) + for id, deg := range inDegree { + if deg == 0 { + queue = append(queue, id) + } + } + + visited := 0 + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + visited++ + + for _, next := range adj[current] { + inDegree[next]-- + if inDegree[next] == 0 { + queue = append(queue, next) + } + } + } + + totalNonWidget := 0 + for _, n := range nodes { + if !widgets[n.ID] { + totalNonWidget++ + } + } + + if visited < totalNonWidget { + // Find nodes that are part of cycles (those with remaining in-degree > 0). + var cycleNodes []string + for id, deg := range inDegree { + if deg > 0 { + cycleNodes = append(cycleNodes, id) + } + } + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "cycle-detected", + Message: fmt.Sprintf("Cycle detected involving %d node(s): %v", len(cycleNodes), cycleNodes), + }) + } +} + +// checkOrphanNodes finds non-widget nodes that are not reachable from any trigger via BFS. +func checkOrphanNodes( + nodes []models.Node, + triggers []models.Node, + outgoing map[string][]models.Edge, + widgets map[string]bool, + result *LintResult, +) { + reachable := make(map[string]bool) + + // BFS from all trigger nodes. + queue := make([]string, 0, len(triggers)) + for _, t := range triggers { + queue = append(queue, t.ID) + reachable[t.ID] = true + } + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, e := range outgoing[current] { + if !reachable[e.TargetID] { + reachable[e.TargetID] = true + queue = append(queue, e.TargetID) + } + } + } + + for _, n := range nodes { + if widgets[n.ID] { + continue + } + if !reachable[n.ID] { + result.Warnings = append(result.Warnings, LintIssue{ + Severity: SeverityWarning, + Rule: "orphan-node", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q is not reachable from any trigger", n.Name), + }) + } + } +} + +// checkDeadEnds finds non-widget, non-trigger nodes with no outgoing edges +// that are not known terminal components. +func checkDeadEnds( + nodes []models.Node, + outgoing map[string][]models.Edge, + widgets map[string]bool, + result *LintResult, +) { + for _, n := range nodes { + if widgets[n.ID] || n.Type == "TYPE_TRIGGER" { + continue + } + + if len(outgoing[n.ID]) > 0 { + continue + } + + compName := getComponentName(n) + if terminalComponents[compName] { + continue + } + + result.Warnings = append(result.Warnings, LintIssue{ + Severity: SeverityWarning, + Rule: "dead-end", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q has no outgoing edges and is not a terminal component", n.Name), + }) + } +} + +// checkMissingApprovalGate verifies that every destructive component has +// an upstream approval node reachable by walking backwards through edges. +func checkMissingApprovalGate( + nodes []models.Node, + incoming map[string][]models.Edge, + nodeByID map[string]models.Node, + widgets map[string]bool, + result *LintResult, +) { + for _, n := range nodes { + if widgets[n.ID] { + continue + } + + compName := getComponentName(n) + if !destructiveComponents[compName] { + continue + } + + if !hasUpstreamApproval(n.ID, incoming, nodeByID) { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "missing-approval-gate", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Destructive action %q in node %q has no upstream approval gate", compName, n.Name), + }) + } + } +} + +// hasUpstreamApproval does a reverse BFS from the given node looking for +// an approval component in its ancestors. +func hasUpstreamApproval( + startID string, + incoming map[string][]models.Edge, + nodeByID map[string]models.Node, +) bool { + visited := make(map[string]bool) + queue := []string{startID} + visited[startID] = true + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, e := range incoming[current] { + if visited[e.SourceID] { + continue + } + visited[e.SourceID] = true + + source, ok := nodeByID[e.SourceID] + if !ok { + continue + } + + if getComponentName(source) == "approval" { + return true + } + + queue = append(queue, e.SourceID) + } + } + + return false +} + +// checkMissingRequiredConfig checks specific component types for +// required or recommended configuration fields. +func checkMissingRequiredConfig( + nodes []models.Node, + incoming map[string][]models.Edge, + result *LintResult, +) { + for _, n := range nodes { + config := n.Configuration + if config == nil { + config = map[string]any{} + } + + compName := getComponentName(n) + + switch compName { + case "claude.textPrompt": + prompt, _ := config["prompt"].(string) + if strings.TrimSpace(prompt) == "" { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "missing-required-config", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q (claude.textPrompt) is missing required \"prompt\" configuration", n.Name), + }) + } + + case "slack.sendTextMessage": + if _, ok := config["channel"]; !ok { + result.Warnings = append(result.Warnings, LintIssue{ + Severity: SeverityWarning, + Rule: "missing-required-config", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q (slack.sendTextMessage) is missing \"channel\" configuration", n.Name), + }) + } + + case "merge": + incomingCount := len(incoming[n.ID]) + if incomingCount < 2 { + result.Info = append(result.Info, LintIssue{ + Severity: SeverityInfo, + Rule: "missing-required-config", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q (merge) has %d incoming edge(s); merge typically expects 2 or more", n.Name, incomingCount), + }) + } + + case "filter": + expr, _ := config["expression"].(string) + if strings.TrimSpace(expr) == "" { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "missing-required-config", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q (filter) is missing required \"expression\" configuration", n.Name), + }) + } + + case "http": + if _, ok := config["url"]; !ok { + result.Warnings = append(result.Warnings, LintIssue{ + Severity: SeverityWarning, + Rule: "missing-required-config", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q (http) is missing \"url\" configuration", n.Name), + }) + } + } + } +} + +// checkExpressionSyntax scans all string values in every node's Configuration +// for unbalanced {{ }} delimiters and invalid $["Node Name"] references. +func checkExpressionSyntax( + nodes []models.Node, + nodeByName map[string]bool, + widgets map[string]bool, + result *LintResult, +) { + for _, n := range nodes { + if widgets[n.ID] { + continue + } + + config := n.Configuration + if config == nil { + continue + } + + for _, val := range collectStringValues(config) { + // Check balanced {{ }} delimiters. + openCount := strings.Count(val, "{{") + closeCount := strings.Count(val, "}}") + if openCount != closeCount { + result.Errors = append(result.Errors, LintIssue{ + Severity: SeverityError, + Rule: "invalid-expression", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q has unbalanced expression delimiters: %d opening '{{' vs %d closing '}}'", n.Name, openCount, closeCount), + }) + } + + // Check $["Node Name"] references point to real nodes. + // Use separate patterns for double-quoted and single-quoted + // to correctly handle node names containing the other quote type. + for _, pat := range []*regexp.Regexp{nodeRefDoubleQuotePattern, nodeRefSingleQuotePattern} { + matches := pat.FindAllStringSubmatch(val, -1) + for _, match := range matches { + refName := match[1] + if !nodeByName[refName] { + result.Warnings = append(result.Warnings, LintIssue{ + Severity: SeverityWarning, + Rule: "invalid-expression", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Node %q references unknown node %q", n.Name, refName), + }) + } + } + } + } + } +} + +// collectStringValues recursively extracts all string values from a map. +func collectStringValues(m map[string]any) []string { + if m == nil { + return nil + } + var result []string + for _, v := range m { + switch val := v.(type) { + case string: + result = append(result, val) + case map[string]any: + result = append(result, collectStringValues(val)...) + case []any: + for _, item := range val { + if s, ok := item.(string); ok { + result = append(result, s) + } + if sub, ok := item.(map[string]any); ok { + result = append(result, collectStringValues(sub)...) + } + } + } + } + return result +} + +// checkUnreachableBranches checks that filter components have at least one +// "default" channel outgoing edge, ensuring the matching path has somewhere to go. +func checkUnreachableBranches( + nodes []models.Node, + outgoing map[string][]models.Edge, + result *LintResult, +) { + for _, n := range nodes { + compName := getComponentName(n) + if compName != "filter" { + continue + } + + hasDefault := false + for _, e := range outgoing[n.ID] { + if e.Channel == "default" { + hasDefault = true + break + } + } + + if !hasDefault { + result.Info = append(result.Info, LintIssue{ + Severity: SeverityInfo, + Rule: "unreachable-branch", + NodeID: n.ID, + NodeName: n.Name, + Message: fmt.Sprintf("Filter node %q has no \"default\" channel outgoing edge; matched events have nowhere to go", n.Name), + }) + } + } +} + +// getComponentName returns the component or trigger name for a node. +func getComponentName(node models.Node) string { + if node.Ref.Component != nil { + return node.Ref.Component.Name + } + if node.Ref.Trigger != nil { + return node.Ref.Trigger.Name + } + return "" +} diff --git a/pkg/linter/linter_test.go b/pkg/linter/linter_test.go new file mode 100644 index 0000000000..84f05a4697 --- /dev/null +++ b/pkg/linter/linter_test.go @@ -0,0 +1,948 @@ +package linter + +import ( + "os" + "testing" + + "github.com/ghodss/yaml" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/superplanehq/superplane/pkg/models" +) + +// --------------------------------------------------------------------------- +// Test helpers +// --------------------------------------------------------------------------- + +func triggerNode(id, name string) models.Node { + return models.Node{ + ID: id, + Name: name, + Type: "TYPE_TRIGGER", + Ref: models.NodeRef{Trigger: &models.TriggerRef{Name: "pagerduty.onIncident"}}, + Configuration: map[string]any{}, + } +} + +func componentNode(id, name, componentName string, config map[string]any) models.Node { + return models.Node{ + ID: id, + Name: name, + Type: "TYPE_COMPONENT", + Ref: models.NodeRef{Component: &models.ComponentRef{Name: componentName}}, + Configuration: config, + } +} + +func widgetNode(id, name string) models.Node { + return models.Node{ + ID: id, + Name: name, + Type: "TYPE_WIDGET", + Ref: models.NodeRef{Widget: &models.WidgetRef{Name: "annotation"}}, + Configuration: map[string]any{}, + } +} + +func edge(src, tgt, channel string) models.Edge { + return models.Edge{SourceID: src, TargetID: tgt, Channel: channel} +} + +// countIssuesByRule returns the number of issues with the given rule name. +func countIssuesByRule(issues []LintIssue, rule string) int { + count := 0 + for _, i := range issues { + if i.Rule == rule { + count++ + } + } + return count +} + +// --------------------------------------------------------------------------- +// Original tests (existing rules) +// --------------------------------------------------------------------------- + +func TestLintCanvas_HealthyCanvas(t *testing.T) { + // Full valid flow: trigger -> filter -> 3 parallel -> merge -> claude -> slack -> approval + nodes := []models.Node{ + triggerNode("t1", "Listen for incidents"), + componentNode("f1", "Is it P1", "filter", map[string]any{ + "expression": `$["Listen for incidents"].data.priority == "P1"`, + }), + componentNode("c1", "Get deploy", "github.getRelease", nil), + componentNode("c2", "Get metrics", "http", map[string]any{"url": "https://api.example.com"}), + componentNode("c3", "Get logs", "pagerduty.listLogEntries", nil), + componentNode("m1", "Wait for all", "merge", nil), + componentNode("ai", "AI Assessment", "claude.textPrompt", map[string]any{ + "prompt": "Analyze the incident: {{ $[\"Listen for incidents\"].data.title }}", + }), + componentNode("sl", "Notify Slack", "slack.sendTextMessage", map[string]any{ + "channel": "#incidents", + }), + componentNode("ap", "Approve", "approval", nil), + } + edges := []models.Edge{ + edge("t1", "f1", "default"), + edge("f1", "c1", "default"), + edge("f1", "c2", "default"), + edge("f1", "c3", "default"), + edge("c1", "m1", "default"), + edge("c2", "m1", "default"), + edge("c3", "m1", "default"), + edge("m1", "ai", "success"), + edge("ai", "sl", "default"), + edge("sl", "ap", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "pass", result.Status) + assert.Empty(t, result.Errors) + assert.Equal(t, 9, result.Summary.TotalNodes) + assert.Equal(t, 10, result.Summary.TotalEdges) +} + +func TestLintCanvas_EmptyCanvas(t *testing.T) { + result := LintCanvas(nil, nil, nil) + + assert.Equal(t, "pass", result.Status) + assert.Empty(t, result.Errors) + assert.Empty(t, result.Warnings) + assert.Empty(t, result.Info) + assert.Equal(t, 0, result.Summary.TotalNodes) + assert.Equal(t, 0, result.Summary.TotalEdges) +} + +func TestLintCanvas_OrphanNode(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "Connected", "http", map[string]any{"url": "https://example.com"}), + componentNode("orphan", "Orphaned Node", "http", map[string]any{"url": "https://example.com"}), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 1, countIssuesByRule(result.Warnings, "orphan-node")) + found := false + for _, w := range result.Warnings { + if w.Rule == "orphan-node" { + assert.Equal(t, "Orphaned Node", w.NodeName) + found = true + } + } + assert.True(t, found) +} + +func TestLintCanvas_DeadEnd(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "Dead End Node", "http", map[string]any{"url": "https://example.com"}), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + deadEnds := countIssuesByRule(result.Warnings, "dead-end") + assert.Equal(t, 1, deadEnds) + + for _, w := range result.Warnings { + if w.Rule == "dead-end" { + assert.Equal(t, "Dead End Node", w.NodeName) + } + } +} + +func TestLintCanvas_DeadEnd_TerminalOK(t *testing.T) { + // All terminal components should not produce dead-end warnings. + terminals := []struct { + name string + component string + }{ + {"Approve", "approval"}, + {"Slack", "slack.sendTextMessage"}, + {"Create Issue", "github.createIssue"}, + {"Create PD", "pagerduty.createIncident"}, + {"Resolve PD", "pagerduty.resolveIncident"}, + } + + for _, tc := range terminals { + t.Run(tc.component, func(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("term", tc.name, tc.component, nil), + } + edges := []models.Edge{ + edge("t1", "term", "default"), + } + + result := LintCanvas(nodes, edges, nil) + assert.Equal(t, 0, countIssuesByRule(result.Warnings, "dead-end")) + }) + } +} + +func TestLintCanvas_MissingApprovalGate(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("d1", "Resolve Incident", "pagerduty.resolveIncident", nil), + } + edges := []models.Edge{ + edge("t1", "d1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "fail", result.Status) + require.Equal(t, 1, countIssuesByRule(result.Errors, "missing-approval-gate")) + + for _, e := range result.Errors { + if e.Rule == "missing-approval-gate" { + assert.Equal(t, "Resolve Incident", e.NodeName) + assert.Contains(t, e.Message, "pagerduty.resolveIncident") + } + } +} + +func TestLintCanvas_ApprovalGatePresent(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("ap", "Approve First", "approval", nil), + componentNode("d1", "Resolve Incident", "pagerduty.resolveIncident", nil), + } + edges := []models.Edge{ + edge("t1", "ap", "default"), + edge("ap", "d1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 0, countIssuesByRule(result.Errors, "missing-approval-gate")) +} + +func TestLintCanvas_MissingConfig_EmptyPrompt(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("ai", "AI Node", "claude.textPrompt", map[string]any{ + "prompt": "", + }), + } + edges := []models.Edge{ + edge("t1", "ai", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + configErrors := 0 + for _, e := range result.Errors { + if e.Rule == "missing-required-config" && e.NodeName == "AI Node" { + configErrors++ + assert.Contains(t, e.Message, "prompt") + } + } + assert.Equal(t, 1, configErrors) +} + +func TestLintCanvas_MissingConfig_MergeSingleInput(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("m1", "Solo Merge", "merge", nil), + } + edges := []models.Edge{ + edge("t1", "m1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + mergeInfo := 0 + for _, i := range result.Info { + if i.Rule == "missing-required-config" && i.NodeName == "Solo Merge" { + mergeInfo++ + assert.Contains(t, i.Message, "1 incoming edge") + } + } + assert.Equal(t, 1, mergeInfo) +} + +func TestLintCanvas_MissingConfig_FilterNoExpression(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("f1", "Empty Filter", "filter", map[string]any{ + "expression": "", + }), + } + edges := []models.Edge{ + edge("t1", "f1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + filterErrors := 0 + for _, e := range result.Errors { + if e.Rule == "missing-required-config" && e.NodeName == "Empty Filter" { + filterErrors++ + assert.Contains(t, e.Message, "expression") + } + } + assert.Equal(t, 1, filterErrors) +} + +func TestLintCanvas_InvalidExpression_UnbalancedBraces(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "Bad Expr", "http", map[string]any{ + "url": "{{ no closing", + }), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + exprErrors := 0 + for _, e := range result.Errors { + if e.Rule == "invalid-expression" { + exprErrors++ + assert.Contains(t, e.Message, "unbalanced") + } + } + assert.Equal(t, 1, exprErrors) +} + +func TestLintCanvas_InvalidExpression_BadNodeRef(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "Bad Ref", "http", map[string]any{ + "url": `{{ $["Nonexistent Node"].data }}`, + }), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + refWarnings := 0 + for _, w := range result.Warnings { + if w.Rule == "invalid-expression" { + refWarnings++ + assert.Contains(t, w.Message, "Nonexistent Node") + } + } + assert.Equal(t, 1, refWarnings) +} + +func TestLintCanvas_ValidExpression(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Listen for incidents"), + componentNode("c1", "Use Data", "http", map[string]any{ + "url": `{{ $["Listen for incidents"].data.field }}`, + }), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 0, countIssuesByRule(result.Warnings, "invalid-expression")) + assert.Equal(t, 0, countIssuesByRule(result.Errors, "invalid-expression")) +} + +func TestLintCanvas_UnreachableBranch(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("f1", "Filter Without Default", "filter", map[string]any{ + "expression": "true", + }), + componentNode("c1", "On Match", "http", map[string]any{"url": "https://example.com"}), + } + edges := []models.Edge{ + edge("t1", "f1", "default"), + edge("f1", "c1", "match"), // not "default" + } + + result := LintCanvas(nodes, edges, nil) + + branchInfo := 0 + for _, i := range result.Info { + if i.Rule == "unreachable-branch" { + branchInfo++ + assert.Equal(t, "Filter Without Default", i.NodeName) + } + } + assert.Equal(t, 1, branchInfo) +} + +func TestLintCanvas_WidgetsIgnored(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "Connected", "approval", nil), + widgetNode("w1", "My Annotation"), + widgetNode("w2", "Another Note"), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + // Widgets are not connected to anything — they should not produce warnings. + } + + result := LintCanvas(nodes, edges, nil) + + for _, w := range result.Warnings { + assert.NotEqual(t, "orphan-node", w.Rule, "widgets should not produce orphan-node warnings") + assert.NotEqual(t, "dead-end", w.Rule, "widgets should not produce dead-end warnings") + } +} + +// --------------------------------------------------------------------------- +// New tests for C1: Cycle detection +// --------------------------------------------------------------------------- + +func TestLintCanvas_CycleDetected(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("a", "Node A", "http", map[string]any{"url": "https://a.com"}), + componentNode("b", "Node B", "http", map[string]any{"url": "https://b.com"}), + componentNode("c", "Node C", "http", map[string]any{"url": "https://c.com"}), + } + edges := []models.Edge{ + edge("t1", "a", "default"), + edge("a", "b", "default"), + edge("b", "c", "default"), + edge("c", "a", "default"), // cycle: a -> b -> c -> a + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "fail", result.Status) + assert.Equal(t, 1, countIssuesByRule(result.Errors, "cycle-detected")) +} + +func TestLintCanvas_NoCycle(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("a", "Node A", "approval", nil), + } + edges := []models.Edge{ + edge("t1", "a", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 0, countIssuesByRule(result.Errors, "cycle-detected")) +} + +// --------------------------------------------------------------------------- +// New tests for C6: Duplicate node detection +// --------------------------------------------------------------------------- + +func TestLintCanvas_DuplicateNodeID(t *testing.T) { + nodes := []models.Node{ + triggerNode("dup", "Trigger One"), + componentNode("dup", "Trigger Two", "approval", nil), + } + edges := []models.Edge{ + edge("dup", "dup", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "fail", result.Status) + assert.GreaterOrEqual(t, countIssuesByRule(result.Errors, "duplicate-node-id"), 1) +} + +func TestLintCanvas_DuplicateNodeName(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Same Name"), + componentNode("c1", "Same Name", "approval", nil), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 1, countIssuesByRule(result.Warnings, "duplicate-node-name")) +} + +// --------------------------------------------------------------------------- +// New tests for C7: Edge validation +// --------------------------------------------------------------------------- + +func TestLintCanvas_DanglingEdge(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + } + edges := []models.Edge{ + edge("t1", "nonexistent", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "fail", result.Status) + assert.GreaterOrEqual(t, countIssuesByRule(result.Errors, "invalid-edge"), 1) +} + +func TestLintCanvas_SelfLoop(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "Self Looper", "http", map[string]any{"url": "https://example.com"}), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + edge("c1", "c1", "default"), // self-loop + } + + result := LintCanvas(nodes, edges, nil) + + assert.GreaterOrEqual(t, countIssuesByRule(result.Errors, "invalid-edge"), 1) + found := false + for _, e := range result.Errors { + if e.Rule == "invalid-edge" && e.NodeID == "c1" { + assert.Contains(t, e.Message, "self-loop") + found = true + } + } + assert.True(t, found) +} + +func TestLintCanvas_DuplicateEdge(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "Target", "approval", nil), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + edge("t1", "c1", "default"), // duplicate + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 1, countIssuesByRule(result.Warnings, "duplicate-edge")) +} + +func TestLintCanvas_WidgetAsEdgeEndpoint(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + widgetNode("w1", "Annotation"), + } + edges := []models.Edge{ + edge("t1", "w1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.GreaterOrEqual(t, countIssuesByRule(result.Errors, "invalid-edge"), 1) +} + +// --------------------------------------------------------------------------- +// New tests for C9: Multiple destructive components +// --------------------------------------------------------------------------- + +func TestLintCanvas_MultipleDestructiveComponents_SingleApproval(t *testing.T) { + // One approval should cover all downstream destructive actions. + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("ap", "Approve", "approval", nil), + componentNode("d1", "Resolve", "pagerduty.resolveIncident", nil), + componentNode("d2", "Escalate", "pagerduty.escalateIncident", nil), + } + edges := []models.Edge{ + edge("t1", "ap", "default"), + edge("ap", "d1", "default"), + edge("d1", "d2", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 0, countIssuesByRule(result.Errors, "missing-approval-gate"), + "single upstream approval should satisfy both destructive nodes") +} + +func TestLintCanvas_MultipleDestructiveComponents_OneWithout(t *testing.T) { + // One destructive action has approval, the other does not. + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("ap", "Approve", "approval", nil), + componentNode("d1", "Resolve", "pagerduty.resolveIncident", nil), + componentNode("d2", "Delete Release", "github.deleteRelease", nil), + } + edges := []models.Edge{ + edge("t1", "ap", "default"), + edge("ap", "d1", "default"), + edge("t1", "d2", "default"), // d2 bypasses approval + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 1, countIssuesByRule(result.Errors, "missing-approval-gate")) + for _, e := range result.Errors { + if e.Rule == "missing-approval-gate" { + assert.Equal(t, "Delete Release", e.NodeName) + } + } +} + +// --------------------------------------------------------------------------- +// New test for C10: Nil Configuration map +// --------------------------------------------------------------------------- + +func TestLintCanvas_NilConfiguration(t *testing.T) { + // Nodes with nil Configuration should not panic. + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + { + ID: "nil-config", + Name: "Nil Config Claude", + Type: "TYPE_COMPONENT", + Ref: models.NodeRef{Component: &models.ComponentRef{Name: "claude.textPrompt"}}, + Configuration: nil, // explicitly nil + }, + } + edges := []models.Edge{ + edge("t1", "nil-config", "default"), + } + + // Should not panic. + result := LintCanvas(nodes, edges, nil) + + // Should report missing prompt config error. + assert.Equal(t, "fail", result.Status) + configErrors := 0 + for _, e := range result.Errors { + if e.Rule == "missing-required-config" && e.NodeName == "Nil Config Claude" { + configErrors++ + } + } + assert.Equal(t, 1, configErrors) +} + +// --------------------------------------------------------------------------- +// New test for C11: Deeply nested configuration values +// --------------------------------------------------------------------------- + +func TestLintCanvas_NestedConfigExpression(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Listen for incidents"), + componentNode("c1", "HTTP with headers", "http", map[string]any{ + "url": "https://api.example.com", + "headers": map[string]any{ + "Authorization": `Bearer {{ $["Listen for incidents"].data.token }}`, + }, + }), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + // Should find the valid reference in nested config — no warnings. + assert.Equal(t, 0, countIssuesByRule(result.Warnings, "invalid-expression")) +} + +func TestLintCanvas_NestedConfigBadRef(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "HTTP nested bad ref", "http", map[string]any{ + "url": "https://api.example.com", + "headers": map[string]any{ + "X-Custom": `{{ $["Ghost Node"].data.value }}`, + }, + }), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 1, countIssuesByRule(result.Warnings, "invalid-expression")) +} + +// --------------------------------------------------------------------------- +// New test for C3: Expression regex with quotes in node names +// --------------------------------------------------------------------------- + +func TestLintCanvas_ExpressionSingleQuoteRef(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "Node's Data", "http", map[string]any{"url": "https://example.com"}), + componentNode("c2", "Consumer", "http", map[string]any{ + // Double-quote reference to a node name containing a single quote + "url": `{{ $["Node's Data"].data.field }}`, + }), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + edge("c1", "c2", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + // Should correctly parse the double-quoted reference containing a single quote. + assert.Equal(t, 0, countIssuesByRule(result.Warnings, "invalid-expression")) +} + +// --------------------------------------------------------------------------- +// YAML parsing types for the dogfood test +// --------------------------------------------------------------------------- + +type canvasYAML struct { + Spec struct { + Nodes []nodeYAML `json:"nodes"` + Edges []edgeYAML `json:"edges"` + } `json:"spec"` +} + +type nodeYAML struct { + ID string `json:"id"` + Name string `json:"name"` + Type string `json:"type"` + Configuration map[string]any `json:"configuration"` + Component *struct { + Name string `json:"name"` + } `json:"component"` + Trigger *struct { + Name string `json:"name"` + } `json:"trigger"` + Widget *struct { + Name string `json:"name"` + } `json:"widget"` + Blueprint *struct { + ID string `json:"id"` + } `json:"blueprint"` +} + +type edgeYAML struct { + SourceID string `json:"sourceId"` + TargetID string `json:"targetId"` + Channel string `json:"channel"` +} + +// C8 fix: dogfood test now asserts specific expected warnings and rejects unexpected ones. +func TestLintCanvas_IncidentCopilotTemplate(t *testing.T) { + data, err := os.ReadFile("../../templates/canvases/incident-copilot.yaml") + require.NoError(t, err, "failed to read incident-copilot.yaml template") + + var canvas canvasYAML + err = yaml.Unmarshal(data, &canvas) + require.NoError(t, err, "failed to parse incident-copilot.yaml") + + // Convert YAML nodes to models.Node. + nodes := make([]models.Node, 0, len(canvas.Spec.Nodes)) + for _, yn := range canvas.Spec.Nodes { + n := models.Node{ + ID: yn.ID, + Name: yn.Name, + Type: yn.Type, + Configuration: yn.Configuration, + } + if n.Configuration == nil { + n.Configuration = map[string]any{} + } + if yn.Component != nil { + n.Ref.Component = &models.ComponentRef{Name: yn.Component.Name} + } + if yn.Trigger != nil { + n.Ref.Trigger = &models.TriggerRef{Name: yn.Trigger.Name} + } + if yn.Widget != nil { + n.Ref.Widget = &models.WidgetRef{Name: yn.Widget.Name} + } + if yn.Blueprint != nil { + n.Ref.Blueprint = &models.BlueprintRef{ID: yn.Blueprint.ID} + } + nodes = append(nodes, n) + } + + // Convert YAML edges to models.Edge. + edges := make([]models.Edge, 0, len(canvas.Spec.Edges)) + for _, ye := range canvas.Spec.Edges { + edges = append(edges, models.Edge{ + SourceID: ye.SourceID, + TargetID: ye.TargetID, + Channel: ye.Channel, + }) + } + + result := LintCanvas(nodes, edges, nil) + + // The incident-copilot template should pass the linter with zero errors. + assert.Equal(t, "pass", result.Status, "incident-copilot template should pass lint") + assert.Empty(t, result.Errors, "incident-copilot template should have zero errors") + + // Verify we actually parsed a non-trivial canvas. + assert.Greater(t, result.Summary.TotalNodes, 5, "should have parsed multiple nodes") + assert.Greater(t, result.Summary.TotalEdges, 5, "should have parsed multiple edges") + + // With channel configured, there should be no warnings. + assert.Empty(t, result.Warnings, "copilot template should have zero warnings with channel configured") + + // Assert no orphan nodes, no dead ends, no cycles. + assert.Equal(t, 0, countIssuesByRule(result.Warnings, "orphan-node"), "no orphan nodes expected") + assert.Equal(t, 0, countIssuesByRule(result.Warnings, "dead-end"), "no dead ends expected") + assert.Equal(t, 0, countIssuesByRule(result.Errors, "cycle-detected"), "no cycles expected") + + // Assert info section is reasonable. + for _, info := range result.Info { + t.Logf("INFO: [%s] %s: %s", info.Rule, info.NodeName, info.Message) + } + + // Quality score assertions. + assert.GreaterOrEqual(t, result.QualityScore, 90, "copilot template should score >= 90") + assert.Equal(t, GradeA, result.QualityGrade, "copilot template should be grade A") + t.Logf("Quality: score=%d grade=%s", result.QualityScore, result.QualityGrade) +} + +// --------------------------------------------------------------------------- +// Quality scoring tests +// --------------------------------------------------------------------------- + +func TestQualityScore_Perfect(t *testing.T) { + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("c1", "End", "approval", nil), + } + edges := []models.Edge{ + edge("t1", "c1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, 100, result.QualityScore) + assert.Equal(t, GradeA, result.QualityGrade) +} + +func TestQualityScore_WithErrors(t *testing.T) { + // Destructive component without approval = 1 error. + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("d1", "Resolve", "pagerduty.resolveIncident", nil), + } + edges := []models.Edge{ + edge("t1", "d1", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "fail", result.Status) + // 1 error = -15 points -> score 85, grade B + assert.Equal(t, 85, result.QualityScore) + assert.Equal(t, GradeB, result.QualityGrade) +} + +func TestQualityScore_ManyIssues(t *testing.T) { + // 4 errors (missing-approval-gate) + 1 warning (dead-end on github.deleteRelease, + // which is not in terminalComponents). + // Error penalty: 4*15=60, capped at 60. Warning penalty: 1*5=5. Total: 65. + // Score: 100-65=35, grade F. + nodes := []models.Node{ + triggerNode("t1", "Trigger"), + componentNode("d1", "Resolve", "pagerduty.resolveIncident", nil), + componentNode("d2", "Escalate", "pagerduty.escalateIncident", nil), + componentNode("d3", "Delete", "github.deleteRelease", nil), + componentNode("d4", "Release", "github.createRelease", nil), + } + edges := []models.Edge{ + edge("t1", "d1", "default"), + edge("t1", "d2", "default"), + edge("t1", "d3", "default"), + edge("t1", "d4", "default"), + } + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "fail", result.Status) + assert.LessOrEqual(t, result.QualityScore, 40, "many issues should produce low score") + assert.GreaterOrEqual(t, result.Summary.ErrorCount, 4) +} + +// --------------------------------------------------------------------------- +// Dogfood tests for existing templates +// --------------------------------------------------------------------------- + +func loadTemplateForTest(t *testing.T, path string) ([]models.Node, []models.Edge) { + t.Helper() + data, err := os.ReadFile(path) + require.NoError(t, err, "failed to read template: %s", path) + + var canvas canvasYAML + err = yaml.Unmarshal(data, &canvas) + require.NoError(t, err, "failed to parse template: %s", path) + + nodes := make([]models.Node, 0, len(canvas.Spec.Nodes)) + for _, yn := range canvas.Spec.Nodes { + n := models.Node{ + ID: yn.ID, + Name: yn.Name, + Type: yn.Type, + Configuration: yn.Configuration, + } + if n.Configuration == nil { + n.Configuration = map[string]any{} + } + if yn.Component != nil { + n.Ref.Component = &models.ComponentRef{Name: yn.Component.Name} + } + if yn.Trigger != nil { + n.Ref.Trigger = &models.TriggerRef{Name: yn.Trigger.Name} + } + if yn.Widget != nil { + n.Ref.Widget = &models.WidgetRef{Name: yn.Widget.Name} + } + if yn.Blueprint != nil { + n.Ref.Blueprint = &models.BlueprintRef{ID: yn.Blueprint.ID} + } + nodes = append(nodes, n) + } + + edges := make([]models.Edge, 0, len(canvas.Spec.Edges)) + for _, ye := range canvas.Spec.Edges { + edges = append(edges, models.Edge{ + SourceID: ye.SourceID, + TargetID: ye.TargetID, + Channel: ye.Channel, + }) + } + + return nodes, edges +} + +func TestLintCanvas_IncidentDataCollectionTemplate(t *testing.T) { + nodes, edges := loadTemplateForTest(t, "../../templates/canvases/incident-data-collection.yaml") + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "pass", result.Status, "incident-data-collection template should pass lint") + assert.Empty(t, result.Errors, "incident-data-collection template should have zero errors") + assert.Greater(t, result.Summary.TotalNodes, 3, "should have parsed multiple nodes") + assert.Equal(t, 0, countIssuesByRule(result.Warnings, "orphan-node")) + assert.Equal(t, 0, countIssuesByRule(result.Errors, "cycle-detected")) + assert.Equal(t, GradeA, result.QualityGrade, "incident-data-collection should be grade A") + + t.Logf("Quality: score=%d grade=%s errors=%d warnings=%d", + result.QualityScore, result.QualityGrade, result.Summary.ErrorCount, result.Summary.WarningCount) +} + +func TestLintCanvas_IncidentRouterTemplate(t *testing.T) { + nodes, edges := loadTemplateForTest(t, "../../templates/canvases/incident-router.yaml") + + result := LintCanvas(nodes, edges, nil) + + assert.Equal(t, "pass", result.Status, "incident-router template should pass lint") + assert.Empty(t, result.Errors, "incident-router template should have zero errors") + assert.Greater(t, result.Summary.TotalNodes, 3, "should have parsed multiple nodes") + assert.Equal(t, 0, countIssuesByRule(result.Warnings, "orphan-node")) + assert.Equal(t, 0, countIssuesByRule(result.Errors, "cycle-detected")) + assert.Equal(t, GradeA, result.QualityGrade, "incident-router should be grade A") + + t.Logf("Quality: score=%d grade=%s errors=%d warnings=%d", + result.QualityScore, result.QualityGrade, result.Summary.ErrorCount, result.Summary.WarningCount) +} diff --git a/pkg/public/server.go b/pkg/public/server.go index 014362143e..f8a11de058 100644 --- a/pkg/public/server.go +++ b/pkg/public/server.go @@ -24,6 +24,7 @@ import ( "github.com/superplanehq/superplane/pkg/core" "github.com/superplanehq/superplane/pkg/database" "github.com/superplanehq/superplane/pkg/grpc" + "github.com/superplanehq/superplane/pkg/grpc/actions/canvases" "github.com/superplanehq/superplane/pkg/grpc/actions/messages" "github.com/superplanehq/superplane/pkg/jwt" "github.com/superplanehq/superplane/pkg/logging" @@ -259,6 +260,22 @@ func (s *Server) RegisterGRPCGateway(grpcServerAddr string) error { w.WriteHeader(http.StatusOK) }).Methods("GET") + // Canvas lint endpoint — quality gate validation. + // Registered before the gRPC gateway catch-all so it takes priority. + lintHandler := canvases.LintCanvasHandler(s.registry) + orgLintMiddleware := middleware.OrganizationAuthMiddleware(s.jwt) + s.Router.HandleFunc("/api/v1/canvases/{canvasId}/lint", func(w http.ResponseWriter, r *http.Request) { + orgLintMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + user, ok := middleware.GetUserFromContext(r.Context()) + if !ok { + http.Error(w, "User not found in context", http.StatusUnauthorized) + return + } + r.Header.Set("X-Organization-Id", user.OrganizationID.String()) + lintHandler(w, r) + })).ServeHTTP(w, r) + }).Methods("POST") + // Protect the gRPC gateway routes with organization authentication orgAuthMiddleware := middleware.OrganizationAuthMiddleware(s.jwt) protectedGRPCHandler := orgAuthMiddleware(s.grpcGatewayHandler(grpcGatewayMux)) diff --git a/templates/canvases/incident-copilot-demo.yaml b/templates/canvases/incident-copilot-demo.yaml new file mode 100644 index 0000000000..9be3f0df27 --- /dev/null +++ b/templates/canvases/incident-copilot-demo.yaml @@ -0,0 +1,167 @@ +metadata: + name: "Incident Copilot Demo" + description: "Simplified demo: Manual trigger with mock incident data → Claude AI triage → Slack evidence pack. Click Run to fire." + isTemplate: false +spec: + nodes: + - id: "start-start-dm01" + name: "Run with mock incident" + type: "TYPE_TRIGGER" + configuration: + templates: + - name: "P1 API Gateway Incident" + payload: + incident: + id: "PGR0VU2" + title: "API Gateway: 5xx error rate spike to 15% on /api/v1/orders" + status: "triggered" + urgency: "high" + html_url: "https://acme.pagerduty.com/incidents/PGR0VU2" + created_at: "2026-03-28T14:30:00Z" + priority: + summary: "P1" + service: + summary: "API Gateway (Production)" + assignees: + - summary: "Dragan Petrovic (On-Call SRE)" + body: + details: "5xx error rate spiked from 0.1% to 15.3% at 14:28 UTC. Affects /api/v1/orders endpoint. 1,247 users impacted. Correlated with deployment deploy-api-v2.14.3 at 14:25 UTC." + recent_deploy: + tag_name: "v2.14.3" + name: "Release v2.14.3 - Order Service Refactor" + body: "Refactored order validation logic. Migrated to new payment gateway client. Updated database connection pooling from 20 to 50." + published_at: "2026-03-28T14:25:30Z" + metrics: + error_rate_5xx: "15.3%" + latency_p99: "4500ms" + request_count_drop: "from 15234 to 6721" + metadata: null + position: + x: 200 + "y": 400 + component: null + blueprint: null + trigger: + name: "start" + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-ai01" + name: "AI Triage Assessment" + type: "TYPE_COMPONENT" + configuration: + model: "claude-sonnet-4-20250514" + maxTokens: 4096 + temperature: 0.3 + systemMessage: "You are an expert SRE incident triage assistant. Produce a structured triage report with: 1) SEVERITY ASSESSMENT (P1-P4 with justification), 2) LIKELY ROOT CAUSE (top 3 hypotheses ranked by probability), 3) AFFECTED SYSTEMS, 4) RECOMMENDED ACTIONS (ordered by priority), 5) ESCALATION RECOMMENDATION. Be concise. Use bullet points." + prompt: "Analyze the following production incident data and provide a structured triage report.\n\nFull incident context:\n{{ toJSON(root()) }}" + metadata: null + position: + x: 700 + "y": 400 + component: + name: "claude.textPrompt" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-sl01" + name: "Send to Slack" + type: "TYPE_COMPONENT" + configuration: + channel: "C0APV7H889F" + text: ":rotating_light: *INCIDENT TRIAGE — AUTO-GENERATED*\n\n{{ $[\"AI Triage Assessment\"].data.text }}\n\n---\n:clock1: _Triage generated by SuperPlane Incident Copilot_" + metadata: null + position: + x: 1200 + "y": 400 + component: + name: "slack.sendTextMessage" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "annotation-annotation-dm1a" + name: "annotation" + type: "TYPE_WIDGET" + configuration: + color: "yellow" + height: 200 + text: "### 1. Click Run\n\nThe Manual Run trigger has a pre-loaded P1 incident payload with mock PagerDuty, GitHub deploy, and Datadog metrics data.\n\nClick the **Run** button to fire the workflow." + width: 400 + metadata: null + position: + x: 150 + "y": 130 + component: null + blueprint: null + trigger: null + widget: + name: "annotation" + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "annotation-annotation-dm2b" + name: "annotation2" + type: "TYPE_WIDGET" + configuration: + color: "yellow" + height: 200 + text: "### 2. AI analyzes the incident\n\nClaude receives all incident context and produces a structured severity assessment with root cause hypotheses and recommended actions." + width: 400 + metadata: null + position: + x: 650 + "y": 130 + component: null + blueprint: null + trigger: null + widget: + name: "annotation" + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "annotation-annotation-dm3c" + name: "annotation3" + type: "TYPE_WIDGET" + configuration: + color: "yellow" + height: 200 + text: "### 3. Evidence pack to Slack\n\nThe AI triage report is posted to #hackathon-demo as a structured evidence pack with severity, root cause, and recommended actions." + width: 400 + metadata: null + position: + x: 1150 + "y": 130 + component: null + blueprint: null + trigger: null + widget: + name: "annotation" + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + edges: + - sourceId: "start-start-dm01" + targetId: "component-node-ai01" + channel: "default" + - sourceId: "component-node-ai01" + targetId: "component-node-sl01" + channel: "default" diff --git a/templates/canvases/incident-copilot.yaml b/templates/canvases/incident-copilot.yaml new file mode 100644 index 0000000000..8e7ef68790 --- /dev/null +++ b/templates/canvases/incident-copilot.yaml @@ -0,0 +1,321 @@ +metadata: + name: "Incident Copilot" + description: "AI-powered incident triage: auto-collects context from PagerDuty, GitHub, and Datadog, then uses Claude to generate a structured severity assessment and evidence pack." + isTemplate: false +spec: + nodes: + - id: "pagerduty-onincident-pagerduty-onincident-hk8x3p" + name: "Listen for incidents" + type: "TYPE_TRIGGER" + configuration: + events: + - "incident.triggered" + service: "PQEAM2I" + urgencies: + - "high" + metadata: + service: + html_url: "https://superplane-test.eu.pagerduty.com/service-directory/PQEAM2I" + id: "PQEAM2I" + name: "Default Service" + position: + x: 200 + "y": 500 + component: null + blueprint: null + trigger: + name: "pagerduty.onIncident" + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-f1lt3r" + name: "Is it P1 or P2" + type: "TYPE_COMPONENT" + configuration: + expression: "$[\"Listen for incidents\"].data.incident.priority.summary == \"P1\" || $[\"Listen for incidents\"].data.incident.priority.summary == \"P2\"" + metadata: null + position: + x: 700 + "y": 500 + component: + name: "filter" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-gh8r3l" + name: "Get latest deploy" + type: "TYPE_COMPONENT" + configuration: + repository: "api-service" + metadata: + repository: + id: 1046188046 + name: "api-service" + url: "https://github.com/acme-corp/api-service" + position: + x: 1200 + "y": 350 + component: + name: "github.getRelease" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-dd8m3t" + name: "Fetch Datadog metrics" + type: "TYPE_COMPONENT" + configuration: + method: "GET" + url: "https://api.datadoghq.com/api/v1/query?query=avg:system.cpu.user{service:api-gateway}&from=-3600" + headers: + DD-API-KEY: "{{ $secret.DATADOG_API_KEY }}" + metadata: null + position: + x: 1200 + "y": 500 + component: + name: "http" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-pd8l0g" + name: "Get incident timeline" + type: "TYPE_COMPONENT" + configuration: + incidentId: "{{ $[\"Listen for incidents\"].data.incident.id }}" + metadata: null + position: + x: 1200 + "y": 650 + component: + name: "pagerduty.listLogEntries" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "merge-merge-m3rg3x" + name: "Wait for all context" + type: "TYPE_COMPONENT" + configuration: + enableStopIf: false + enableTimeout: true + executionTimeout: + unit: "minutes" + value: 2 + metadata: null + position: + x: 1700 + "y": 500 + component: + name: "merge" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-cl8ai9" + name: "AI Triage Assessment" + type: "TYPE_COMPONENT" + configuration: + model: "claude-sonnet-4-20250514" + maxTokens: 4096 + temperature: 0.3 + systemMessage: "You are an expert SRE incident triage assistant. Given incident details, recent deployments, metrics, and logs, produce a structured triage report with:\n\n1. SEVERITY ASSESSMENT (P1-P4 with justification)\n2. LIKELY ROOT CAUSE (top 3 hypotheses ranked by probability)\n3. AFFECTED SYSTEMS (services, endpoints, user segments)\n4. RECOMMENDED ACTIONS (ordered by priority, with estimated impact)\n5. ESCALATION RECOMMENDATION (who to page, what team)\n\nBe concise. Use bullet points. Include specific evidence for each claim." + prompt: "INCIDENT:\nTitle: {{ $[\"Listen for incidents\"].data.incident.title }}\nStatus: {{ $[\"Listen for incidents\"].data.incident.status }}\nUrgency: {{ $[\"Listen for incidents\"].data.incident.urgency }}\nPriority: {{ $[\"Listen for incidents\"].data.incident.priority.summary }}\nService: {{ $[\"Listen for incidents\"].data.incident.service.summary }}\n\nRECENT DEPLOYMENT:\n{{ $[\"Get latest deploy\"].data }}\n\nMETRICS:\n{{ $[\"Fetch Datadog metrics\"].data }}\n\nINCIDENT LOG:\n{{ $[\"Get incident timeline\"].data }}" + metadata: null + position: + x: 2200 + "y": 500 + component: + name: "claude.textPrompt" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-sl8ck1" + name: "Send evidence pack to Slack" + type: "TYPE_COMPONENT" + configuration: + channel: "C0APV7H889F" + text: ":rotating_light: *INCIDENT TRIAGE — AUTO-GENERATED*\n\n*{{ $[\"Listen for incidents\"].data.incident.title }}*\nPriority: {{ $[\"Listen for incidents\"].data.incident.priority.summary }}\nService: {{ $[\"Listen for incidents\"].data.incident.service.summary }}\n\n---\n\n{{ $[\"AI Triage Assessment\"].data.text }}\n\n---\n\n_Triage generated by SuperPlane Incident Copilot in < 60 seconds_" + metadata: null + position: + x: 2700 + "y": 500 + component: + name: "slack.sendTextMessage" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "component-node-apr0v1" + name: "Approve remediation" + type: "TYPE_COMPONENT" + configuration: + items: + - type: "anyone" + metadata: null + position: + x: 3200 + "y": 500 + component: + name: "approval" + blueprint: null + trigger: null + widget: null + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "annotation-annotation-hk1a1a" + name: "annotation" + type: "TYPE_WIDGET" + configuration: + color: "yellow" + height: 250 + text: "### 1. Listen for production incidents\n\nThe PagerDuty trigger listens for new incidents. A filter ensures only P1 and P2 incidents proceed.\n\n___\nTo use this template:\n- Connect your PagerDuty account\n- Configure the service to monitor\n- Set urgency filters as needed" + width: 460 + metadata: null + position: + x: 150 + "y": 180 + component: null + blueprint: null + trigger: null + widget: + name: "annotation" + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "annotation-annotation2-hk2b2b" + name: "annotation2" + type: "TYPE_WIDGET" + configuration: + color: "yellow" + height: 250 + text: "### 2. Collect context in parallel\n\nThree parallel branches fetch:\n- Latest deploy from GitHub\n- System metrics from Datadog\n- Incident timeline from PagerDuty\n\nA Merge component waits for all three to complete.\n\n___\nConfigure each data source with your credentials and endpoints." + width: 600 + metadata: null + position: + x: 1150 + "y": 100 + component: null + blueprint: null + trigger: null + widget: + name: "annotation" + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "annotation-annotation3-hk3c3c" + name: "annotation3" + type: "TYPE_WIDGET" + configuration: + color: "yellow" + height: 220 + text: "### 3. AI-powered triage\n\nClaude receives all collected context and produces a structured severity assessment with root cause hypotheses and recommended actions.\n\n___\n- Review the system prompt to customize triage format\n- Adjust temperature for more/less creative analysis" + width: 500 + metadata: null + position: + x: 2150 + "y": 200 + component: null + blueprint: null + trigger: null + widget: + name: "annotation" + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + - id: "annotation-annotation4-hk4d4d" + name: "annotation4" + type: "TYPE_WIDGET" + configuration: + color: "yellow" + height: 220 + text: "### 4. Notify and approve\n\nThe AI triage report is posted to Slack as an evidence pack. An approval gate prevents any automated remediation actions until a human reviews and approves.\n\n___\n- Set the Slack channel for notifications\n- Configure approval requirements" + width: 560 + metadata: null + position: + x: 2650 + "y": 200 + component: null + blueprint: null + trigger: null + widget: + name: "annotation" + isCollapsed: false + integration: null + errorMessage: "" + warningMessage: "" + + edges: + - sourceId: "pagerduty-onincident-pagerduty-onincident-hk8x3p" + targetId: "component-node-f1lt3r" + channel: "default" + - sourceId: "component-node-f1lt3r" + targetId: "component-node-gh8r3l" + channel: "default" + - sourceId: "component-node-f1lt3r" + targetId: "component-node-dd8m3t" + channel: "default" + - sourceId: "component-node-f1lt3r" + targetId: "component-node-pd8l0g" + channel: "default" + - sourceId: "component-node-gh8r3l" + targetId: "merge-merge-m3rg3x" + channel: "default" + - sourceId: "component-node-dd8m3t" + targetId: "merge-merge-m3rg3x" + channel: "default" + - sourceId: "component-node-pd8l0g" + targetId: "merge-merge-m3rg3x" + channel: "default" + - sourceId: "merge-merge-m3rg3x" + targetId: "component-node-cl8ai9" + channel: "success" + - sourceId: "component-node-cl8ai9" + targetId: "component-node-sl8ck1" + channel: "default" + - sourceId: "component-node-sl8ck1" + targetId: "component-node-apr0v1" + channel: "default" diff --git a/web_src/src/ui/CanvasPage/Header.tsx b/web_src/src/ui/CanvasPage/Header.tsx index a3fe909fe8..242c3f3f8a 100644 --- a/web_src/src/ui/CanvasPage/Header.tsx +++ b/web_src/src/ui/CanvasPage/Header.tsx @@ -2,6 +2,7 @@ import { OrganizationMenuButton } from "@/components/OrganizationMenuButton"; import { PermissionTooltip } from "@/components/PermissionGate"; import { usePermissions } from "@/contexts/PermissionsContext"; import { + CheckCircle2, CloudAlert, CloudCheck, Copy, @@ -11,14 +12,18 @@ import { Plus, RefreshCw, RotateCcw, + ShieldAlert, + TriangleAlert, Undo2, Pencil, } from "lucide-react"; +import type { ComponentsNode } from "@/api-client"; +import { lintCanvas, type LintResult, type LintEdge } from "@/utils/canvasLinter"; import { Button } from "../button"; import { Button as UIButton } from "@/components/ui/button"; import { useCanvases } from "@/hooks/useCanvasData"; import { Link, useParams } from "react-router-dom"; -import { useEffect, useRef, useState, type ReactNode } from "react"; +import { useEffect, useMemo, useRef, useState, type ReactNode } from "react"; import { cn } from "@/lib/utils"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { DropdownMenu, DropdownMenuContent, DropdownMenuTrigger } from "@/ui/dropdownMenu"; @@ -77,6 +82,10 @@ interface HeaderProps { lastSavedAt?: Date | string | null; /** Shown in tooltip when saveState is error (last failed save message). */ saveErrorMessage?: string | null; + /** Canvas spec nodes for linter badge. */ + workflowNodes?: ComponentsNode[]; + /** Canvas spec edges for linter badge. */ + workflowEdges?: LintEdge[]; } function formatLastSavedTooltip(at: Date | string | null | undefined): string { @@ -246,8 +255,17 @@ export function Header({ enterEditModeDisabled, enterEditModeDisabledTooltip, unpublishedDraftChangeCount = 0, + workflowNodes, + workflowEdges, }: HeaderProps) { const { workflowId } = useParams<{ workflowId?: string }>(); + + // Canvas linter — runs on every node/edge change. + // Shows the badge whenever the canvas view is active, even on empty canvases. + const lintResult = useMemo(() => { + if (workflowNodes === undefined && workflowEdges === undefined) return null; + return lintCanvas(workflowNodes || [], workflowEdges || []); + }, [workflowNodes, workflowEdges]); const { data: workflows = [], isLoading: workflowsLoading } = useCanvases(organizationId || ""); const { canAct, isLoading: permissionsLoading } = usePermissions(); const canCreateCanvas = permissionsLoading || canAct("canvases", "create"); @@ -475,6 +493,50 @@ export function Header({
+ {lintResult && (topViewMode === "canvas" || topViewMode === undefined) ? (() => { + const hasErrors = lintResult.errorCount > 0; + const hasWarnings = lintResult.warningCount > 0; + const badgeColor = hasErrors + ? "bg-red-100 text-red-800" + : hasWarnings + ? "bg-yellow-100 text-yellow-800" + : "bg-green-100 text-green-800"; + const BadgeIcon = hasErrors ? ShieldAlert : hasWarnings ? TriangleAlert : CheckCircle2; + const badgeLabel = hasErrors + ? `${lintResult.errorCount} error${lintResult.errorCount !== 1 ? "s" : ""}` + : hasWarnings + ? `${lintResult.warningCount} warning${lintResult.warningCount !== 1 ? "s" : ""}` + : "Lint OK"; + + return ( + + + + + {badgeLabel} + + + +

+ Quality Gate: {lintResult.qualityGrade} ({lintResult.qualityScore}/100) +

+ {lintResult.errors.map((e, i) => ( +

+ {e.message} +

+ ))} + {lintResult.warnings.map((w, i) => ( +

+ {w.message} +

+ ))} + {!hasErrors && !hasWarnings ? ( +

No issues found

+ ) : null} +
+
+ ); + })() : null} {isDefaultMode ? ( <> {isVersioningDisabledMode && onExportYamlCopy && onExportYamlDownload ? ( diff --git a/web_src/src/ui/CanvasPage/index.tsx b/web_src/src/ui/CanvasPage/index.tsx index 14f3fd29d4..1fee993fb9 100644 --- a/web_src/src/ui/CanvasPage/index.tsx +++ b/web_src/src/ui/CanvasPage/index.tsx @@ -1099,6 +1099,7 @@ function CanvasPage(props: CanvasPageProps) { memoryItemCount={props.memoryItemCount} onExportYamlCopy={props.onExportYamlCopy} onExportYamlDownload={props.onExportYamlDownload} + workflowNodes={props.workflowNodes} /> {props.headerBanner ?
{props.headerBanner}
: null}
@@ -1733,6 +1734,7 @@ function CanvasContentHeader({ memoryItemCount, onExportYamlCopy, onExportYamlDownload, + workflowNodes, }: { state: CanvasPageState; onSave?: (nodes: CanvasNode[]) => void; @@ -1767,6 +1769,7 @@ function CanvasContentHeader({ memoryItemCount?: number; onExportYamlCopy?: (nodes: CanvasNode[]) => void; onExportYamlDownload?: (nodes: CanvasNode[]) => void; + workflowNodes?: ComponentsNode[]; }) { const stateRef = useRef(state); stateRef.current = state; @@ -1831,6 +1834,8 @@ function CanvasContentHeader({ memoryItemCount={memoryItemCount} onExportYamlCopy={onExportYamlCopy ? handleExportYamlCopy : undefined} onExportYamlDownload={onExportYamlDownload ? handleExportYamlDownload : undefined} + workflowNodes={workflowNodes} + workflowEdges={stateRef.current.edges} /> ); } @@ -2794,6 +2799,8 @@ function CanvasContent({ enterEditModeDisabled={enterEditModeDisabled} enterEditModeDisabledTooltip={enterEditModeDisabledTooltip} unpublishedDraftChangeCount={unpublishedDraftChangeCount} + workflowNodes={workflowNodes} + workflowEdges={styledEdges} /> )} diff --git a/web_src/src/utils/canvasLinter.ts b/web_src/src/utils/canvasLinter.ts new file mode 100644 index 0000000000..138d5ea8c0 --- /dev/null +++ b/web_src/src/utils/canvasLinter.ts @@ -0,0 +1,490 @@ +import type { ComponentsNode } from "@/api-client"; + +export type LintSeverity = "error" | "warning" | "info"; +export type QualityGrade = "A" | "B" | "C" | "D" | "F"; + +export interface LintIssue { + severity: LintSeverity; + rule: string; + nodeId: string; + nodeName: string; + message: string; +} + +export interface LintResult { + status: "pass" | "fail"; + errors: LintIssue[]; + warnings: LintIssue[]; + info: LintIssue[]; + errorCount: number; + warningCount: number; + infoCount: number; + qualityScore: number; + qualityGrade: QualityGrade; +} + +/** Accepts either ComponentsEdge (from API spec) or React Flow Edge shape. */ +export interface LintEdge { + sourceId?: string; + targetId?: string; + source?: string; + target?: string; + channel?: string; +} + +function edgeSourceId(e: LintEdge): string | undefined { + return e.sourceId || e.source; +} +function edgeTargetId(e: LintEdge): string | undefined { + return e.targetId || e.target; +} + +const TERMINAL_COMPONENTS = new Set([ + "approval", + "slack.sendTextMessage", + "slack.waitForButtonClick", + "github.createIssue", + "github.createIssueComment", + "github.createRelease", + "github.updateIssue", + "github.publishCommitStatus", + "github.addReaction", + "pagerduty.createIncident", + "pagerduty.resolveIncident", + "pagerduty.escalateIncident", + "pagerduty.annotateIncident", + "pagerduty.acknowledgeIncident", +]); + +const DESTRUCTIVE_COMPONENTS = new Set([ + "pagerduty.resolveIncident", + "pagerduty.escalateIncident", + "github.deleteRelease", + "github.createRelease", +]); + +const NODE_REF_DOUBLE = /\$\["([^"]+)"\]/g; +const NODE_REF_SINGLE = /\$\['([^']+)'\]/g; + +function getComponentName(node: ComponentsNode): string { + return node.component?.name || node.trigger?.name || ""; +} + +function computeQualityScore( + errors: number, + warnings: number, + infos: number, +): { score: number; grade: QualityGrade } { + const ep = Math.min(errors * 15, 60); + const wp = Math.min(warnings * 5, 30); + const ip = Math.min(infos * 1, 10); + const score = Math.max(0, 100 - ep - wp - ip); + + let grade: QualityGrade; + if (score >= 90) grade = "A"; + else if (score >= 75) grade = "B"; + else if (score >= 60) grade = "C"; + else if (score >= 40) grade = "D"; + else grade = "F"; + + return { score, grade }; +} + +/** Recursively collect all string values from a config object. */ +function collectStrings(obj: unknown): string[] { + if (typeof obj === "string") return [obj]; + if (Array.isArray(obj)) return obj.flatMap(collectStrings); + if (obj && typeof obj === "object") { + return Object.values(obj).flatMap(collectStrings); + } + return []; +} + +export function lintCanvas( + nodes: ComponentsNode[] | undefined, + edges: LintEdge[] | undefined, +): LintResult { + const result: LintResult = { + status: "pass", + errors: [], + warnings: [], + info: [], + errorCount: 0, + warningCount: 0, + infoCount: 0, + qualityScore: 100, + qualityGrade: "A", + }; + + if (!nodes?.length) return result; + + const safeEdges = edges || []; + const nodeById = new Map(nodes.map((n) => [n.id, n])); + const nodeNames = new Set(nodes.map((n) => n.name)); + const widgets = new Set(nodes.filter((n) => n.type === "TYPE_WIDGET").map((n) => n.id)); + const triggers = nodes.filter((n) => n.type === "TYPE_TRIGGER"); + + // Build adjacency. + const outgoing = new Map(); + const incoming = new Map(); + for (const e of safeEdges) { + const src = edgeSourceId(e); + const tgt = edgeTargetId(e); + if (src) { + const list = outgoing.get(src) || []; + list.push(e); + outgoing.set(src, list); + } + if (tgt) { + const list = incoming.get(tgt) || []; + list.push(e); + incoming.set(tgt, list); + } + } + + // ---- Rule: Duplicate node IDs ---- + const seenIds = new Set(); + for (const n of nodes) { + if (n.id && seenIds.has(n.id)) { + result.errors.push({ + severity: "error", + rule: "duplicate-node-id", + nodeId: n.id, + nodeName: n.name || "", + message: `Duplicate node ID "${n.id}"`, + }); + } + if (n.id) seenIds.add(n.id); + } + + // ---- Rule: Duplicate node names (non-widgets) ---- + const seenNames = new Set(); + for (const n of nodes) { + if (widgets.has(n.id!)) continue; + if (n.name && seenNames.has(n.name)) { + result.warnings.push({ + severity: "warning", + rule: "duplicate-node-name", + nodeId: n.id || "", + nodeName: n.name, + message: `Duplicate node name "${n.name}" — expression references may be ambiguous`, + }); + } + if (n.name) seenNames.add(n.name); + } + + // ---- Rule: Invalid edges ---- + const seenEdgeKeys = new Set(); + for (let i = 0; i < safeEdges.length; i++) { + const e = safeEdges[i]; + const src = edgeSourceId(e); + const tgt = edgeTargetId(e); + + if (src && !nodeById.has(src)) { + result.errors.push({ + severity: "error", + rule: "invalid-edge", + nodeId: src, + nodeName: "", + message: `Edge ${i} references nonexistent source node "${src}"`, + }); + } + if (tgt && !nodeById.has(tgt)) { + result.errors.push({ + severity: "error", + rule: "invalid-edge", + nodeId: tgt || "", + nodeName: "", + message: `Edge ${i} references nonexistent target node "${tgt}"`, + }); + } + if (src && tgt && src === tgt) { + result.errors.push({ + severity: "error", + rule: "invalid-edge", + nodeId: src, + nodeName: nodeById.get(src)?.name || "", + message: `Edge ${i} is a self-loop on node "${src}"`, + }); + } + if (src && tgt) { + const key = `${src}|${tgt}|${e.channel || "default"}`; + if (seenEdgeKeys.has(key)) { + result.warnings.push({ + severity: "warning", + rule: "duplicate-edge", + nodeId: src, + nodeName: nodeById.get(src)?.name || "", + message: `Duplicate edge from "${src}" to "${tgt}" on channel "${e.channel || "default"}"`, + }); + } + seenEdgeKeys.add(key); + } + if (src && widgets.has(src)) { + result.errors.push({ + severity: "error", + rule: "invalid-edge", + nodeId: src, + nodeName: nodeById.get(src)?.name || "", + message: `Edge ${i} uses widget node "${src}" as source`, + }); + } + if (tgt && widgets.has(tgt)) { + result.errors.push({ + severity: "error", + rule: "invalid-edge", + nodeId: tgt, + nodeName: nodeById.get(tgt)?.name || "", + message: `Edge ${i} uses widget node "${tgt}" as target`, + }); + } + } + + // ---- Rule: Cycle detection (Kahn's) ---- + const inDegree = new Map(); + const adj = new Map(); + for (const n of nodes) { + if (widgets.has(n.id!)) continue; + inDegree.set(n.id!, 0); + } + for (const e of safeEdges) { + const src = edgeSourceId(e); + const tgt = edgeTargetId(e); + if (!src || !tgt) continue; + if (widgets.has(src) || widgets.has(tgt)) continue; + adj.set(src, [...(adj.get(src) || []), tgt]); + inDegree.set(tgt, (inDegree.get(tgt) || 0) + 1); + } + const kahnQueue: string[] = []; + for (const [id, deg] of inDegree) { + if (deg === 0) kahnQueue.push(id); + } + let kahnVisited = 0; + while (kahnQueue.length > 0) { + const cur = kahnQueue.shift()!; + kahnVisited++; + for (const next of adj.get(cur) || []) { + const d = (inDegree.get(next) || 1) - 1; + inDegree.set(next, d); + if (d === 0) kahnQueue.push(next); + } + } + const totalNonWidget = nodes.filter((n) => !widgets.has(n.id!)).length; + if (kahnVisited < totalNonWidget) { + result.errors.push({ + severity: "error", + rule: "cycle-detected", + nodeId: "", + nodeName: "", + message: "Cycle detected in canvas graph", + }); + } + + // ---- Rule: Orphan nodes ---- + const reachable = new Set(); + const bfsQueue = triggers.map((t) => t.id!).filter(Boolean); + for (const id of bfsQueue) reachable.add(id); + while (bfsQueue.length > 0) { + const current = bfsQueue.shift()!; + for (const e of outgoing.get(current) || []) { + const tgt = edgeTargetId(e); + if (tgt && !reachable.has(tgt)) { + reachable.add(tgt); + bfsQueue.push(tgt); + } + } + } + for (const n of nodes) { + if (widgets.has(n.id!) || reachable.has(n.id!)) continue; + result.warnings.push({ + severity: "warning", + rule: "orphan-node", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" is not reachable from any trigger`, + }); + } + + // ---- Rule: Dead ends ---- + for (const n of nodes) { + if (widgets.has(n.id!) || n.type === "TYPE_TRIGGER") continue; + if ((outgoing.get(n.id!) || []).length > 0) continue; + if (TERMINAL_COMPONENTS.has(getComponentName(n))) continue; + result.warnings.push({ + severity: "warning", + rule: "dead-end", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" has no outgoing edges and is not a terminal component`, + }); + } + + // ---- Rule: Missing approval gate ---- + for (const n of nodes) { + if (widgets.has(n.id!)) continue; + const comp = getComponentName(n); + if (!DESTRUCTIVE_COMPONENTS.has(comp)) continue; + + const visited = new Set([n.id!]); + const rQueue = [n.id!]; + let found = false; + while (rQueue.length > 0 && !found) { + const cur = rQueue.shift()!; + for (const e of incoming.get(cur) || []) { + const srcId = edgeSourceId(e); + if (!srcId || visited.has(srcId)) continue; + visited.add(srcId); + const src = nodeById.get(srcId); + if (src && getComponentName(src) === "approval") { + found = true; + break; + } + rQueue.push(srcId); + } + } + if (!found) { + result.errors.push({ + severity: "error", + rule: "missing-approval-gate", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Destructive action "${comp}" in "${n.name}" has no upstream approval gate`, + }); + } + } + + // ---- Rule: Missing required config ---- + for (const n of nodes) { + const comp = getComponentName(n); + const config = (n.configuration || {}) as Record; + + switch (comp) { + case "claude.textPrompt": { + const prompt = typeof config.prompt === "string" ? config.prompt.trim() : ""; + if (!prompt) { + result.errors.push({ + severity: "error", + rule: "missing-required-config", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" (claude.textPrompt) is missing required "prompt" configuration`, + }); + } + break; + } + case "slack.sendTextMessage": { + if (!config.channel) { + result.warnings.push({ + severity: "warning", + rule: "missing-required-config", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" (slack.sendTextMessage) is missing "channel" configuration`, + }); + } + break; + } + case "merge": { + const inCount = (incoming.get(n.id!) || []).length; + if (inCount < 2) { + result.info.push({ + severity: "info", + rule: "missing-required-config", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" (merge) has ${inCount} incoming edge(s); merge typically expects 2 or more`, + }); + } + break; + } + case "filter": { + const expr = typeof config.expression === "string" ? config.expression.trim() : ""; + if (!expr) { + result.errors.push({ + severity: "error", + rule: "missing-required-config", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" (filter) is missing required "expression" configuration`, + }); + } + break; + } + case "http": { + if (!config.url) { + result.warnings.push({ + severity: "warning", + rule: "missing-required-config", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" (http) is missing "url" configuration`, + }); + } + break; + } + } + } + + // ---- Rule: Expression syntax validation ---- + for (const n of nodes) { + if (widgets.has(n.id!) || !n.configuration) continue; + const strings = collectStrings(n.configuration); + for (const val of strings) { + const openCount = (val.match(/\{\{/g) || []).length; + const closeCount = (val.match(/\}\}/g) || []).length; + if (openCount !== closeCount) { + result.errors.push({ + severity: "error", + rule: "invalid-expression", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" has unbalanced expression delimiters: ${openCount} '{{' vs ${closeCount} '}}'`, + }); + } + + for (const pat of [NODE_REF_DOUBLE, NODE_REF_SINGLE]) { + pat.lastIndex = 0; + let m; + while ((m = pat.exec(val)) !== null) { + if (!nodeNames.has(m[1])) { + result.warnings.push({ + severity: "warning", + rule: "invalid-expression", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Node "${n.name}" references unknown node "${m[1]}"`, + }); + } + } + } + } + } + + // ---- Rule: Unreachable branches ---- + for (const n of nodes) { + if (getComponentName(n) !== "filter") continue; + const edges = outgoing.get(n.id!) || []; + const hasDefault = edges.some((e) => e.channel === "default"); + if (!hasDefault) { + result.info.push({ + severity: "info", + rule: "unreachable-branch", + nodeId: n.id || "", + nodeName: n.name || "", + message: `Filter node "${n.name}" has no "default" channel outgoing edge; matched events have nowhere to go`, + }); + } + } + + // Compute counts and quality score. + result.errorCount = result.errors.length; + result.warningCount = result.warnings.length; + result.infoCount = result.info.length; + result.status = result.errorCount > 0 ? "fail" : "pass"; + + const qs = computeQualityScore(result.errorCount, result.warningCount, result.infoCount); + result.qualityScore = qs.score; + result.qualityGrade = qs.grade; + + return result; +}