maxritter · maxritter · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -22,6 +22,9 @@ RUN rm -rf /var/lib/apt/lists/* && \
         vim \
         netcat-openbsd \
         socat \
+        bubblewrap \
+        iptables \
+        ipset \
         chromium && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -8,6 +8,9 @@
     "${localWorkspaceFolderBasename}"
   ],
   "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
+  "mounts": [
+    "source=claude-code-config-${devcontainerId},target=/root/.claude,type=volume"
+  ],
   "customizations": {
     "vscode": {
       "extensions": [

diff --git a/README.md b/README.md
diff --git a/console/package.json b/console/package.json
@@ -35,8 +35,12 @@
   "devDependencies": {
     "@git-diff-view/file": "^0.1.1",
     "@git-diff-view/react": "^0.1.1",
+    "@happy-dom/global-registrator": "^20.9.0",
     "@iconify/react": "^6.0.2",
     "@tailwindcss/vite": "^4.1.18",
+    "@testing-library/dom": "^10.4.1",
+    "@testing-library/react": "^16.3.2",
+    "@testing-library/user-event": "^14.6.1",
     "@types/bun": "^1.3.8",
     "@types/cookie-parser": "^1.4.10",
     "@types/cors": "^2.8.19",

diff --git a/console/src/ui/viewer/hooks/useSettings.ts b/console/src/ui/viewer/hooks/useSettings.ts
diff --git a/console/src/ui/viewer/views/Settings/index.tsx b/console/src/ui/viewer/views/Settings/index.tsx
diff --git a/console/src/ui/viewer/views/Spec/annotation/PlanAnnotator.tsx b/console/src/ui/viewer/views/Spec/annotation/PlanAnnotator.tsx
diff --git a/console/src/ui/viewer/views/Spec/annotation/useAnnotation.ts b/console/src/ui/viewer/views/Spec/annotation/useAnnotation.ts
diff --git a/console/src/ui/viewer/views/Spec/index.tsx b/console/src/ui/viewer/views/Spec/index.tsx
diff --git a/console/src/ui/viewer/views/Spec/parsePlanContent.ts b/console/src/ui/viewer/views/Spec/parsePlanContent.ts
diff --git a/console/tests/annotation/plan-annotator-persistence.test.tsx b/console/tests/annotation/plan-annotator-persistence.test.tsx
diff --git a/console/tests/ui/spec-section-rendering.test.ts b/console/tests/ui/spec-section-rendering.test.ts
diff --git a/console/tsconfig.json b/console/tsconfig.json
@@ -25,7 +25,8 @@
   "include": [
     "src/**/*.ts",
     "src/**/*.tsx",
-    "tests/**/*.ts"
+    "tests/**/*.ts",
+    "tests/**/*.tsx"
   ],
   "exclude": [
     "node_modules",

diff --git a/docs/docusaurus/docs/getting-started/installation.md b/docs/docusaurus/docs/getting-started/installation.md
@@ -49,6 +49,13 @@ When enabled, Codex provides an independent adversarial review during `/spec` pl
 
 Pilot Shell works inside Dev Containers. Copy the `.devcontainer` folder from the [Pilot Shell repository](https://github.com/maxritter/pilot-shell/tree/main/.devcontainer) into your project, adapt it to your needs (base image, extensions, dependencies), and run the installer inside the container. The installer auto-detects the container environment and skips system-level dependencies like Homebrew.
 
+For tighter isolation when working with untrusted code, layer Claude Code's [`/sandbox`](https://code.claude.com/docs/en/sandboxing) on top — `bubblewrap`, `socat`, `iptables`, and `ipset` are pre-installed in the Dockerfile so it works out of the box on Linux.
+
+### Further reading
+
+- [Claude Code · Development containers](https://code.claude.com/docs/en/devcontainer) — Anthropic's reference container, persistent volumes, organization policy, network egress, the `--dangerously-skip-permissions` flag.
+- [Claude Code · Sandboxing](https://code.claude.com/docs/en/sandboxing) — Seatbelt (macOS) and bubblewrap (Linux/WSL2), `/sandbox` modes, `allowedDomains`, filesystem allow/deny rules, security limitations.
+
 ## Install Specific Version
 
 ```bash

diff --git a/docs/docusaurus/docs/workflows/fix.md b/docs/docusaurus/docs/workflows/fix.md
@@ -0,0 +1,106 @@
+---
+sidebar_position: 5
+title: /fix
+description: Bugfix workflow — investigate, RED test, fix, verify end-to-end, done.
+---
+
+# /fix
+
+Bugfix workflow with TDD. Investigates the bug, writes a failing test, fixes at the root cause, **verifies end-to-end against the running program**, finishes. No plan file, no approval mid-flow, no separate verify phase.
+
+Use `/fix` for bugs. Use [`/spec`](/docs/workflows/spec) for features and architectural changes — including bugfixes that warrant a full plan with approval and code review.
+
+```bash
+$ pilot
+> /fix "annotation persistence drops fields between save and reload"
+> /fix "off-by-one in pagination at boundary"
+> /fix "wrong default for max_retries"
+```
+
+`/fix` is **always quick**. If investigation reveals the bug is multi-component, architectural, or otherwise larger than a quick fix, `/fix` stops cleanly and tells you to re-invoke with `/spec`. It does not silently switch lanes.
+
+## Workflow
+
+```text
+Investigate  →  RED  →  Fix  →  Verify End-to-End  →  Quality Gate  →  Done
+```
+
+### Investigate
+
+Trace the bug to `file:lineN — function() does X but should do Y` with **High** or **Medium** confidence. For UI / async / race / timing bugs that don't surface from a static read, add temporary `SPEC-DEBUG:`-marked logs at component boundaries before tracing. Low confidence bails out.
+
+### RED — Write the Reproducing Test
+
+Encode `Currently → Expected` via an existing public entry point. Run it; it must **fail** with an error matching the symptom. A test that passes against buggy code doesn't encode the bug.
+
+### Fix at the Root Cause
+
+Minimal change at the root cause. Symptom patches (`try/except` hiding the bug, swallowed returns, silently normalised inputs) are forbidden. Re-run the reproducing test → must pass. Run the targeted test module(s).
+
+A diff sanity check follows: root-cause file IS in the diff, no unplanned files, < 20 lines typically. A grep over the diff catches symptom-patching and leftover `print` / `console.log` / `SPEC-DEBUG:` markers — every match must be justified or reverted.
+
+### Verify End-to-End
+
+The primary correctness signal. Run the actual program with the original input and observe the symptom is gone — a passing unit test alone is never accepted. This step is mandatory.
+
+| Bug surface | Tool | Evidence |
+| --- | --- | --- |
+| **UI / web** | 4-tier browser stack: **Claude Code Chrome** → **Chrome DevTools MCP** → **playwright-cli** → **agent-browser** | Page state, element values |
+| **CLI** | The exact command the user ran | Stdout, exit code |
+| **HTTP API** | `curl` / HTTP client with the user's body | Status code, response field |
+| **Library / SDK / function** | `python -c '…'`, `node -e '…'`, REPL, scratch script | Returned value |
+| **Background job** | Trigger manually with the failing input | Logs |
+
+The completion report must include concrete evidence — bare assertions ("looks fixed", "tests pass") are insufficient. If the symptom persists, the unit test is at the wrong layer: move the assertion up to the user's actual entry point and re-run RED → Fix → Verify End-to-End.
+
+### Quality Gate
+
+Lint + types + build (when applicable), then the full anti-regression suite, once. If a far-from-the-fix test breaks, the bug has unintended cross-coupling — bail out to `/spec`.
+
+### Finalise
+
+Worktree mode: bundle test + fix into one `fix:` commit. Approval gate fires only if **Plan Approval** is enabled. The completion report includes a mandatory **E2E** line documenting what was actually run.
+
+## When to bail out — use `/spec` instead
+
+`/fix` stops and tells you to re-invoke with `/spec` when:
+
+- Bug spans 3+ files or 2+ components.
+- Root cause is architectural, not a single line.
+- Fix needs defense-in-depth at multiple layers.
+- Confidence stays Low — root cause can't be pinned to file:line.
+- Two failed fix attempts.
+- Fix has non-trivial UI implications that warrant a recorded Verification Scenario.
+
+The full lane (`/spec`) adds: Behavior Contract, three-task structure, plan file with approval gate, Console annotation cycle, `cp`+`trap` revert-test proof in verify, iteration cap at 3.
+
+## Common issues
+
+| Symptom | What it means | What to do |
+| --- | --- | --- |
+| Can't reproduce | Description too vague or environment-dependent | Ask for exact steps, env, stack trace. Don't write a speculative fix. |
+| Test passes without the fix | Test doesn't encode the bug | Tighten the assertion or pick a more specific input. |
+| Fix breaks far-away tests | Cross-coupling beyond the quick lane | Bail out. Re-invoke with `/spec`. |
+| Reproducing test green but user still hits the bug | Test sits below the user's layer | Move the assertion up and re-run RED → Fix → Verify End-to-End. |
+| Two failed fix attempts | Architectural problem, not a fix problem | Bail out. The pattern needs reconsidering, not another patch. |
+
+## Configurable Toggles
+
+`/fix` honours the same Console Settings as `/spec`:
+
+| Toggle | Default | Effect when disabled |
+| --- | --- | --- |
+| **Ask Questions** | On | Investigation skips clarifying questions and uses defaults. |
+| **Plan Approval** | On | The end-of-flow approval gate is skipped. |
+
+When both are off, `/fix` runs end-to-end with no user interaction. Worktree isolation is not honoured — use `/spec` if you want a worktree.
+
+## When to use `/spec` vs `/fix`
+
+| Use `/fix` | Use `/spec` |
+| --- | --- |
+| Something is broken | Building new functionality |
+| You want a fix without ceremony | Architecture or design decision matters |
+| You want it done now | Work warrants a written plan + approval |
+
+`/fix` handles the full range — from typos to multi-step debugging. It bails out and points to `/spec` only when complexity is truly architectural (multiple components, defense-in-depth at multiple layers, repeated failed attempts).
diff --git a/docs/docusaurus/docs/workflows/spec.md b/docs/docusaurus/docs/workflows/spec.md
@@ -8,15 +8,14 @@ description: Plan, implement, and verify complex features with full automation u
 
 Plan, implement, and verify complex features with full automation using Spec-Driven Development.
 
-**Replaces Claude Code's built-in plan mode (Shift+Tab).** Best for complex features, refactoring tasks, or any work where you want to review a plan before implementation begins. The structured workflow prevents scope creep and ensures every task is tested and verified before being marked complete.
+**Replaces Claude Code's built-in plan mode (Shift+Tab).** Best for new features, refactoring, architectural changes — work where a plan and a design discussion add value before code. The structured workflow prevents scope creep and ensures every task is tested and verified before being marked complete.
 
-> **Tip:** For vague ideas or unclear requirements, use [`/prd`](/docs/workflows/prd) first to brainstorm back-and-forth and produce a PRD, then hand off to `/spec`.
+For bugfixes, use [`/fix`](/docs/workflows/fix). For vague ideas, use [`/prd`](/docs/workflows/prd) first to produce a PRD, then hand off to `/spec`.
 
 ```bash
 $ pilot
 > /spec "Add user authentication with OAuth and JWT tokens"
 > /spec "Migrate the REST API to GraphQL"
-> /spec "Fix the crash when deleting nodes with two children"  # bugfix auto-detected
 ```
 
 ## Workflow
@@ -38,18 +37,9 @@ Full exploration workflow for new functionality, refactoring, or any work where
 - Full plan with scope, risks, and Definition of Done
 - Unified verification agent (optional, configurable in Console Settings)
 
-### Bugfix Spec (auto-detected)
+### Bugfixes
 
-Investigation-first flow for targeted fixes. Finds the root cause before touching any code, then enforces a uniform three-task structure so every bugfix follows the same process — no freewheeling.
-
-- **Root cause tracing:** backward through the call chain to `file:line`, with CodeGraph caller/callee analysis
-- **Pattern analysis:** compare broken vs working code paths
-- **Behavior Contract:** every plan pins down `Given / When / Currently / Expected / Anti-regression` — the invariant the fix must produce and the behavior it must not break
-- **Three uniform tasks** (always, regardless of bug size):
-  1. **Write Reproducing Test (RED)** — must FAIL before any fix code exists
-  2. **Implement Fix at Root Cause** — reproducing test passes, full suite passes
-  3. **Quality Gate** — lint, type check, build, full suite green after any auto-fixes
-- **Verify audit:** authoritative full suite + always-on revert-test (proves the reproducing test would genuinely fail without the fix — rules out retroactive rubber-stamp tests) + root-cause-at-source audit (flags symptom patches and caller-side workarounds) + anti-regression spot-check — no sub-agents, tests carry the proof
+For a bugfix workflow without a plan file, use [`/fix`](/docs/workflows/fix). When the user types `/spec` with a bug description, the full bugfix workflow runs — root-cause investigation, three-task structure (RED test → fix → quality gate), Behavior Contract audit, revert-test proof in verify, iteration cap at 3.
 
 ## Three Phases
 
@@ -66,7 +56,7 @@ Investigation-first flow for targeted fixes. Finds the root cause before touchin
 - Isolated git worktree, new branch from default, or current branch (your choice)
 - Strict TDD for each task: RED → GREEN → REFACTOR
 - Quality hooks auto-lint, format, and type-check every edit
-- Full test suite after each task to catch regressions early
+- Full test suite runs at the **Quality Gate** task (end), not after every task — running it per-fix-task is the single biggest token sink in bundled bugfix plans, so the targeted test module is used between fixes and the authoritative full-suite run happens once
 
 ### Verify Phase
 
@@ -100,6 +90,8 @@ When all three are disabled, `/spec` runs end-to-end without any user interactio
 
 Both reviewers run in a separate context window and don't consume the main session's context budget. Optional **Codex adversarial reviewers** (off by default) provide an independent second opinion using OpenAI Codex.
 
+**Codex runs at most once per `/spec` invocation.** Plan iterations (annotation feedback, verify re-runs, fixing prior findings) reuse the result of the first Codex review instead of re-launching — a sentinel file in the session directory enforces this. The bugfix planning phase no longer runs Codex at all; adversarial review is most valuable on real code, not on a plan.
+
 ## Branch Strategy (Optional)
 
 When starting a `/spec` task, you're asked how you want to work:
@@ -111,3 +103,5 @@ When starting a `/spec` task, you're asked how you want to work:
 | **New branch from default** | Fetches origin, creates `feat/<slug>` (or `fix/<slug>` for bugfixes) from `origin/main`, and checks it out. Best when your current branch isn't clean but you don't want full worktree isolation. |
 
 Disable the **Worktree Support** toggle in Console Settings to skip this question entirely — `/spec` will always use the current branch.
+
+For bugfixes, use [`/fix`](/docs/workflows/fix) — the worktree question is asked here in `/spec` because that's where it applies.
diff --git a/docs/docusaurus/sidebars.ts b/docs/docusaurus/sidebars.ts
@@ -22,6 +22,7 @@ const sidebars: SidebarsConfig = {
         "workflows/create-skill",
         "workflows/prd",
         "workflows/spec",
+        "workflows/fix",
         "workflows/benchmark",
         "workflows/quick-mode",
       ],

diff --git a/docs/site/src/components/HeroSection.tsx b/docs/site/src/components/HeroSection.tsx
@@ -1,6 +1,5 @@
 import { GithubIcon, BookOpen } from "lucide-react";
 import { Button } from "@/components/ui/button";
-import { Badge } from "@/components/ui/badge";
 import Logo from "@/components/Logo";
 
 const HeroSection = () => {
@@ -26,73 +25,6 @@ const HeroSection = () => {
           </p>
         </div>
 
-        {/* Feature highlights */}
-        <div className="flex flex-wrap justify-center gap-3 xs:gap-4 sm:gap-6 mb-6 xs:mb-8 animate-fade-in-up animation-delay-400 px-2">
-          <div className="text-center">
-            <div className="text-lg xs:text-xl sm:text-2xl font-bold text-primary">
-              Spec-Driven
-            </div>
-            <div className="text-[9px] xs:text-[10px] sm:text-xs text-muted-foreground">
-              Plan → Build → Verify
-            </div>
-          </div>
-          <div className="w-px h-8 bg-border/50 hidden xs:block" />
-          <div className="text-center">
-            <div className="text-lg xs:text-xl sm:text-2xl font-bold text-primary">
-              TDD
-            </div>
-            <div className="text-[9px] xs:text-[10px] sm:text-xs text-muted-foreground">
-              Test-First
-            </div>
-          </div>
-          <div className="w-px h-8 bg-border/50 hidden xs:block" />
-          <div className="text-center">
-            <div className="text-lg xs:text-xl sm:text-2xl font-bold text-primary">
-              Memory
-            </div>
-            <div className="text-[9px] xs:text-[10px] sm:text-xs text-muted-foreground">
-              Persistent Context
-            </div>
-          </div>
-          <div className="w-px h-8 bg-border/50 hidden xs:block" />
-          <div className="text-center">
-            <div className="text-lg xs:text-xl sm:text-2xl font-bold text-primary">
-              Overlays
-            </div>
-            <div className="text-[9px] xs:text-[10px] sm:text-xs text-muted-foreground">
-              Modify Defaults
-            </div>
-          </div>
-          <div className="w-px h-8 bg-border/50 hidden xs:block" />
-          <div className="text-center">
-            <div className="text-lg xs:text-xl sm:text-2xl font-bold text-primary">
-              Hooks
-            </div>
-            <div className="text-[9px] xs:text-[10px] sm:text-xs text-muted-foreground">
-              Quality Gates
-            </div>
-          </div>
-        </div>
-
-        {/* Feature badges */}
-        <div className="flex flex-wrap justify-center gap-1.5 xs:gap-2 mb-6 xs:mb-8 animate-fade-in-up animation-delay-400 px-2">
-          <Badge variant="secondary" className="text-[10px] xs:text-xs">
-            Worktree Support
-          </Badge>
-          <Badge variant="secondary" className="text-[10px] xs:text-xs">
-            MCP Servers
-          </Badge>
-          <Badge variant="secondary" className="text-[10px] xs:text-xs">
-            LSP Servers
-          </Badge>
-          <Badge variant="secondary" className="text-[10px] xs:text-xs">
-            Semantic Search
-          </Badge>
-          <Badge variant="secondary" className="text-[10px] xs:text-xs">
-            Pilot Bot
-          </Badge>
-        </div>
-
         {/* CTA Buttons */}
         <div className="flex flex-col sm:flex-row items-center justify-center gap-2 sm:gap-4 animate-fade-in-up animation-delay-500 px-2">
           <Button

diff --git a/docs/site/src/components/InstallSection.tsx b/docs/site/src/components/InstallSection.tsx
@@ -94,7 +94,11 @@ const InstallSection = () => {
                   <code className="text-primary bg-primary/10 px-1.5 py-0.5 rounded">
                     /spec
                   </code>{" "}
-                  features &amp; bugfixes{" → "}
+                  features{" → "}
+                  <code className="text-primary bg-primary/10 px-1.5 py-0.5 rounded">
+                    /fix
+                  </code>{" "}
+                  bugfixes{" → "}
                   <code className="text-primary bg-primary/10 px-1.5 py-0.5 rounded">
                     /create-skill
                   </code>{" "}