diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9973c599..ec22b073 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -22,6 +22,9 @@ RUN rm -rf /var/lib/apt/lists/* && \ vim \ netcat-openbsd \ socat \ + bubblewrap \ + iptables \ + ipset \ chromium && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 61edaa29..166f25c0 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -8,6 +8,9 @@ "${localWorkspaceFolderBasename}" ], "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + "mounts": [ + "source=claude-code-config-${devcontainerId},target=/root/.claude,type=volume" + ], "customizations": { "vscode": { "extensions": [ diff --git a/.githooks/pre-commit b/.githooks/pre-commit index ee29e9d9..94169813 100755 --- a/.githooks/pre-commit +++ b/.githooks/pre-commit @@ -32,6 +32,7 @@ fi # --- 1. Python unit tests --- LAUNCHER_CHANGED=$(git diff --cached --name-only -- 'launcher/' 'pilot/hooks/' | head -1) INSTALLER_CHANGED=$(git diff --cached --name-only -- 'installer/' | head -1) +BENCHMARK_CHANGED=$(git diff --cached --name-only -- 'pilot/skills/benchmark/' | head -1) if [ -n "$LAUNCHER_CHANGED" ]; then echo "[pre-commit] Python source changed — running unit tests..." @@ -45,6 +46,12 @@ if [ -n "$INSTALLER_CHANGED" ]; then echo "[pre-commit] Installer unit tests passed." fi +if [ -n "$BENCHMARK_CHANGED" ]; then + echo "[pre-commit] Benchmark skill changed — running benchmark unit tests..." + uv run pytest pilot/skills/benchmark/tests/ -q --tb=short 2>&1 | tail -20 + echo "[pre-commit] Benchmark unit tests passed." +fi + # --- 2. Console unit tests --- CONSOLE_CHANGED=$(git diff --cached --name-only -- 'console/src/' 'console/scripts/' 'console/package.json' 'console/tsconfig.json' 'console/vite.config.ts' | head -1) diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index 412cbbeb..317ba3d4 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -91,7 +91,7 @@ jobs: - name: Run unit tests with coverage run: | - python3 -m pytest installer/tests/unit/ launcher/tests/unit/ -v \ + python3 -m pytest installer/tests/unit/ launcher/tests/unit/ pilot/skills/benchmark/tests/ -v \ --cov=installer --cov=launcher \ --cov-report=term --cov-report=xml @@ -117,7 +117,7 @@ jobs: - name: Setup Bun uses: oven-sh/setup-bun@3d267786b128fe76c2f16a390aa2448b815359f3 # v2 with: - bun-version: latest + bun-version: "1.3.11" - name: Install dependencies working-directory: console @@ -154,7 +154,7 @@ jobs: - name: Setup Bun uses: oven-sh/setup-bun@3d267786b128fe76c2f16a390aa2448b815359f3 # v2 with: - bun-version: latest + bun-version: "1.3.11" - name: Install dependencies working-directory: console diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 641bf226..12d199ae 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -140,7 +140,7 @@ jobs: - name: Run unit tests with coverage run: | - python3 -m pytest installer/tests/unit/ launcher/tests/unit/ pilot/hooks/tests/ -v \ + python3 -m pytest installer/tests/unit/ launcher/tests/unit/ pilot/hooks/tests/ pilot/skills/benchmark/tests/ -v \ --cov=installer --cov=launcher --cov=pilot.hooks \ --cov-report=term --cov-report=xml @@ -166,7 +166,7 @@ jobs: - name: Setup Bun uses: oven-sh/setup-bun@3d267786b128fe76c2f16a390aa2448b815359f3 # v2 with: - bun-version: latest + bun-version: "1.3.11" - name: Install dependencies working-directory: console @@ -203,7 +203,7 @@ jobs: - name: Setup Bun uses: oven-sh/setup-bun@3d267786b128fe76c2f16a390aa2448b815359f3 # v2 with: - bun-version: latest + bun-version: "1.3.11" - name: Install dependencies working-directory: console diff --git a/.gitignore b/.gitignore index 9cad3a7d..146f7deb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,15 @@ __pycache__/ *.py[codz] *$py.class +# Generated skill artifacts — SKILL.md and hashes.json are produced by +# `pilot skill-build` at install time from manifest.json + steps/*.md. +# They must never be committed from the source tree. +pilot/skills/*/SKILL.md +pilot/skills/*/hashes.json + +# Local reference clones (not part of this repo) +claude-pace/ + # C extensions *.so @@ -225,6 +234,8 @@ __marimo__/ scheduled_tasks.lock CLAUDE.md AGENTS.md +benchmarks/** +skill-creator spec-annotate changes-header apm.yml @@ -233,6 +244,8 @@ apm.yml .agents .codegraph .vercel +.codex +.mypy_cache .mcp.json demo skills-lock.json @@ -269,3 +282,5 @@ bun.lock # Kiro .kiro/ .worktrees/ +.env*.local +.omx/ diff --git a/CHANGELOG.md b/CHANGELOG.md index c7a64fc6..17f01a37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,155 @@ All notable changes to Pilot Shell will be documented in this file. +## [8.4.1] - 2026-04-27 + +### Bug Fixes + +- Native usage analytics, ccusage scrub, hook + rules + site updates + +## [8.4.0] - 2026-04-24 + +### Features + +- Add /benchmark skill and evaluation framework + +## [8.3.0] - 2026-04-23 + +### Features + +- Cross-platform statusline usage, in-process skill build, console task cards + +## [8.2.4] - 2026-04-21 + +### Bug Fixes + +- Prevent vector-db disk exhaustion + model routing UI + project-scoped annotations +- **hooks:** Make SessionEnd fully non-blocking so harness cancellation can't leak workers + +## [8.2.3] - 2026-04-17 + +### Bug Fixes + +- Drop skill banner, normalize step heading levels across workflow skills + +### Miscellaneous + +- Reframe customization docs and rework site hero/pillars + +## [8.2.2] - 2026-04-17 + +### Bug Fixes + +- Updated inconsistent steps in spec workflow + +## [8.2.1] - 2026-04-17 + +### Bug Fixes + +- Removed static effort for skills/commands to adjust based on global +- Customization overrides, skill decomposition polish, generated artifacts untracked + +## [8.2.0] - 2026-04-16 + +### Features + +- Customization packs, Opus 4.7 support, and Codex bugfixes + +## [8.1.0] - 2026-04-15 + +### Features + +- Add session details with JSONL stats, cost calculation, and enhanced UI + +## [8.0.10] - 2026-04-14 + +### Bug Fixes + +- Codegraph native SQLite repair, /spec new-branch option, session prompt display + +## [8.0.9] - 2026-04-13 + +### Bug Fixes + +- Deduplicate plugin extensions across marketplaces and add progressive dashboard loading + +## [8.0.8] - 2026-04-13 + +### Bug Fixes + +- Add Chrome DevTools MCP plugin, update browser automation to 4-tier, fix auto-mode flag and docs + +### Miscellaneous + +- Updated Demo Gif +- Updated Readme + +## [8.0.7] - 2026-04-10 + +### Bug Fixes + +- Restore dom globals after terminal-preview-xss test to prevent portal ssr leak +- Stop incomplete child_process mocks poisoning CI test runs +- Add tool output compression + sandboxed content cards to Usage view +- Hook venv sync, console annotation writes, codegraph native sqlite + +## [8.0.6] - 2026-04-09 + +### Bug Fixes + +- Walk up directory tree for git repo detection in CodeGraph guard + +## [8.0.5] - 2026-04-09 + +### Bug Fixes + +- Skip CodeGraph indexing in non-git directories + +### Miscellaneous + +- Updated Readme +- Improved Readme + +## [8.0.4] - 2026-04-09 + +### Bug Fixes + +- Improved PRD and Spec Sharing / Annotation UI on pilot-shell.com +- Improved Dependency Installation Speed for Installer and Updater + +## [8.0.3] - 2026-04-09 + +### Bug Fixes + +- Improved Code Reviewer Stability, Codegraph Usage and Context Mode + +### Miscellaneous + +- Updated specifications images + +## [8.0.2] - 2026-04-09 + +### Bug Fixes + +- Console dashboard overhaul — 2x2 recent cards, usage model breakdown, session filtering, spec tab consistency + +## [8.0.1] - 2026-04-08 + +### Bug Fixes + +- Use correct CronCreate parameter name (cron, not schedule) in bot skills + +## [8.0.0] - 2026-04-08 + +### Bug Fixes + +- Pin Bun to 1.3.11 (1.2.15 lacks compression APIs, latest is unstable) + +### Features + +- Add Pilot Bot — persistent automation agent with scheduled tasks, background jobs, and optional Telegram +- Complete Console overhaul — v8.0.0 + ## [7.11.4] - 2026-04-07 ### Bug Fixes diff --git a/README.md b/README.md index f0920340..68ded98e 100644 --- a/README.md +++ b/README.md @@ -2,27 +2,30 @@ Pilot Shell -**The professional development environment for [Claude Code](https://docs.anthropic.com/en/docs/claude-code)** +### The Claude Code Engineering Platform. -### Claude Code is powerful. Pilot Shell makes it reliable. - -From requirement to production-grade code. Planned, tested, verified.
-**Tests enforced. Context optimized. Quality automated.** +From requirement to production-grade code — planned, tested, verified.
+**Spec-driven plans. Enforced quality gates. Persistent knowledge.** [![Stars](https://img.shields.io/github/stars/maxritter/pilot-shell?style=flat&color=F59E0B)](https://github.com/maxritter/pilot-shell/stargazers) [![Star History](https://img.shields.io/badge/Star_History-chart-8B5CF6)](https://star-history.com/#maxritter/pilot-shell&Date) [![Downloads](https://img.shields.io/github/downloads/maxritter/pilot-shell/total?color=3B82F6)](https://github.com/maxritter/pilot-shell/releases) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-10B981.svg)](https://github.com/maxritter/pilot-shell/pulls) -⭐ [Star this repo](https://github.com/maxritter/pilot-shell) · 🌐 [Website](https://pilot-shell.com) · 📖 [Documentation](https://pilot-shell.com/docs) · 🆕 [Updates](https://www.linkedin.com/in/rittermax/) · 📋 [Changelog](https://pilot.openchangelog.com/) - -
+

+ Install • + Features • + Console • + Docs • + Website • + Changelog +

```bash curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/install.sh | bash ``` -**Works on macOS, Linux, and Windows (WSL2).** +**macOS · Linux · Windows (WSL2)** — installs in under 2 minutes.
@@ -32,17 +35,30 @@ curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/install. --- -## Why I Built This +--- + +## Why Pilot Shell -**Claude Code writes code fast**. But without structure, it skips tests, loses context, and produces inconsistent results — especially on complex, established codebases where there are real conventions to follow and real regressions to catch. I tried other frameworks. Most of them add complexity — dozens of agents, elaborate scaffolding, thousands of lines of instruction files — but the output doesn't get a lot better. You just burn more tokens, wait longer and have to deal with a more complex setup. +**Claude Code writes code fast** — but without structure, it skips tests, loses context, and produces inconsistent results. Other frameworks add complexity (dozens of agents, thousands of lines of config) without meaningfully better output. -**So I built Pilot Shell**. Spec-driven development plans, implements, and verifies features end-to-end. Context engineering preserves decisions and knowledge across sessions. Quality hooks enforce linting, formatting, type checking, and TDD on every edit — not as suggestions, but as gates. Semantic search and a code knowledge graph give Claude deep codebase understanding. Token optimization cuts costs by 60–90%. A modular extension system with team sharing makes everything reusable. MCP servers and language servers provide real-time diagnostics and library docs. Every component exists because I hit a real problem — and solved it structurally. +**Pilot Shell is different.** Every component solves a real problem: -**This isn't a vibe coding tool**, it's agentic engineering that produces production-grade results. You install it once, run `pilot` in any project, then `/setup-rules` to generate your project rules. Use `/prd` to brainstorm and turn vague ideas into clear requirements, then `/spec` to plan, implement, and verify — when it's done, the work is tested and ready to ship. As patterns emerge, `/create-skill` captures your workflows so they're reusable across projects. +- **`/spec`** — plans, implements, and verifies features end-to-end with TDD +- **`/fix`** — bugfix workflow with RED-before-GREEN discipline; bails out when complexity exceeds the standard fix lane +- **`/prd`** — brainstorm ideas into clear requirements through with optional deep research +- **Quality hooks** — enforce linting, formatting, type checking, and tests as quality gates +- **Context engineering** — preserves decisions and knowledge across sessions +- **Code intelligence** — semantic search (Probe) + code knowledge graph (CodeGraph) +- **Token optimization** — 60–90% cost reduction via RTK and context-mode +- **Extensions** — reusable rules, skills, and MCP servers with team sharing and [customization](https://pilot-shell.com/docs/features/customization) +- **Console** — local web dashboard with real-time notifications and session management +- **Pilot Bot** — persistent automation agent with scheduled tasks and background jobs + +Run `pilot` for Spec-Driven Development with `/spec`, or `pilot bot` for 24/7 automations. --- -## Getting Started +

Getting Started

### Prerequisites @@ -50,9 +66,11 @@ curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/install. **Claude Subscription:** Solo developers should choose [Max 5x](https://claude.com/pricing) for moderate usage or [Max 20x](https://claude.com/pricing) for heavy usage. Teams should use [Team Premium](https://claude.com/pricing) (6.25x usage per member, SSO, admin tools, billing management). Companies with stricter compliance or procurement requirements should use [Enterprise](https://claude.com/pricing) (API based pricing applies per usage). -**Chrome Extension (Recommended):** Install the [Claude Code Chrome extension](https://code.claude.com/docs/en/chrome) for browser automation and E2E testing. Pilot automatically detects it and uses it as the preferred tool. When Chrome isn't available, Pilot falls back to [playwright-cli](https://github.com/microsoft/playwright-cli) (reliable element targeting, persistent sessions, tracing) or [agent-browser](https://agent-browser.dev/) (lightweight, fast startup). +**Terminal (Recommended):** [cmux](https://cmux.com) works great with Pilot Shell — its vertical tab layout lets you run multiple sessions side by side. Any modern terminal works: [Ghostty](https://ghostty.org/), [iTerm2](https://iterm2.com/), or the built-in macOS/Linux terminal. + +**Claude Chrome (Recommended):** Install the [Claude Code Chrome extension](https://code.claude.com/docs/en/chrome) for browser automation and E2E testing. Pilot automatically detects it and uses it as the preferred tool. When the extension isn't available, Pilot falls back to [Chrome DevTools MCP](https://github.com/anthropics/chrome-devtools-mcp) (direct CDP access, Lighthouse, performance tracing), then [playwright-cli](https://github.com/microsoft/playwright-cli) (persistent sessions, tracing) or [agent-browser](https://agent-browser.dev/) (lightweight, fast startup). -**Codex Plugin (Optional):** Install the [Codex plugin](https://github.com/openai/codex-plugin-cc) for adversarial code review powered by OpenAI Codex. When enabled in Console Settings, Codex provides an independent second opinion during `/spec` planning and verification phases. A [ChatGPT Plus](https://chatgpt.com/#pricing) subscription ($20/mo) covers the Codex API usage needed for code reviews. +**Codex Plugin (Included):** The [Codex plugin](https://github.com/openai/codex-plugin-cc) is installed automatically with Pilot. It provides adversarial code review powered by OpenAI Codex — an independent second opinion during `/spec` planning and verification. Run `/codex:setup` once to authenticate, then enable reviewers in Console Settings → Reviewers. A [ChatGPT Plus](https://chatgpt.com/#pricing) subscription ($20/mo) covers the Codex API usage. ### Installation @@ -64,6 +82,36 @@ curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/install. Installs globally on macOS, Linux, and Windows (WSL2). All tools and rules go to `~/.pilot/` and `~/.claude/`. After installation, `cd` into any project and run `pilot` or `ccp` to start. +
+Downgrade + +If you encounter an issue or unfixed bug in the latest version, you can always go back to a previous version (see [releases](https://github.com/maxritter/pilot-shell/releases)): + +```bash +export VERSION=8.4.1 +curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/install.sh | bash +``` +
+ +
+Uninstalling + +Removes the Pilot binary, plugin files, managed commands/rules, settings and shell aliases: + +```bash +curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/uninstall.sh | bash +``` +
+ +
+Using a Dev Container + +Pilot Shell works inside Dev Containers. Copy the [`.devcontainer`](https://github.com/maxritter/pilot-shell/tree/main/.devcontainer) folder from this repository into your project, adapt it to your needs (base image, extensions, dependencies), and run the installer inside the container. The installer auto-detects the container environment and skips system-level dependencies like Homebrew. + +For tighter isolation when working with untrusted code, combine the dev container with Claude Code's [`/sandbox`](https://code.claude.com/docs/en/sandboxing) — `bubblewrap`, `socat`, `iptables`, and `ipset` are pre-installed in the Dockerfile so it works out of the box on Linux. See Anthropic's [development containers](https://code.claude.com/docs/en/devcontainer) and [sandboxing](https://code.claude.com/docs/en/sandboxing) docs for hardening patterns (egress allowlist, managed settings, persistent volumes). + +
+
What the installer does @@ -72,40 +120,128 @@ Installs globally on macOS, Linux, and Windows (WSL2). All tools and rules go to 1. **Prerequisites** — Checks/installs Homebrew, Node.js, Python 3.12+, uv, git, jq 2. **Claude files** — Sets up `~/.claude/` plugin — rules, commands, hooks, MCP servers 3. **Config files** — Creates `.nvmrc` and project config -4. **Dependencies** — Installs Probe, RTK, CodeGraph, [playwright-cli](https://github.com/microsoft/playwright-cli), [agent-browser](https://agent-browser.dev/), language servers +4. **Dependencies** — Installs Probe, RTK, CodeGraph, context-mode (better-sqlite3), [Chrome DevTools MCP](https://github.com/anthropics/chrome-devtools-mcp), [playwright-cli](https://github.com/microsoft/playwright-cli), [agent-browser](https://agent-browser.dev/), language servers 5. **Shell integration** — Auto-configures bash, fish, and zsh with `pilot` alias 6. **VS Code extensions** — Installs recommended extensions for your stack 7. **Finalize** — Success message with next steps
-### Installing a Specific Version +--- + +

How It Works

+ +Just chat — no plan, no approval gate. [Quick mode](https://pilot-shell.com/docs/workflows/quick-mode) is the default: quality hooks and TDD enforcement still apply, best for small tasks and exploration. For anything that needs a plan, use `/spec` — not Claude Code's built-in plan mode. + +### /spec — Spec-Driven Development (features) -Pin to a specific release (see [releases](https://github.com/maxritter/pilot-shell/releases)): +**[`/spec`](https://pilot-shell.com/docs/workflows/spec) replaces Claude Code's built-in plan mode** (Shift+Tab) for new features, refactoring, and architectural work. It provides a complete planning workflow with TDD, verification, and code review. + +For bugs, use [`/fix`](https://pilot-shell.com/docs/workflows/fix) (see below). Specs are saved to `docs/plans/` and visible in the Console's **Specification** tab. ```bash -export VERSION=7.11.4 -curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/install.sh | bash +pilot +> /spec "Add user authentication with OAuth and JWT tokens" +> /spec "Migrate the REST API to GraphQL" ``` -### Uninstalling +``` +Discuss → Plan → Approve → Implement (TDD) → Verify → Done + ↑ ↓ + └── Loop──┘ +``` -Removes the Pilot binary, plugin files, managed commands/rules, settings and shell aliases: +Pilot Shell Console — Specifications + +
+Feature Mode + +Full exploration workflow for new functionality, refactoring, or architectural changes. + +**Plan:** Explores codebase with semantic search → asks clarifying questions → writes detailed spec with scope, tasks, and definition of done → for UI features, writes **E2E test scenarios** (step-by-step, browser-executable) that become the verification contract → **spec-review sub-agent** validates completeness → waits for your approval. Optional **Codex adversarial review** provides an independent second opinion when enabled. + +**Implement:** Creates an isolated git worktree → implements each task with strict TDD (RED → GREEN → REFACTOR) → quality hooks auto-lint, format, and type-check every edit → full test suite after each task. + +**Verify:** Full test suite + actual program execution → **unified review sub-agent** (compliance + quality + goal) → for UI features, executes each E2E scenario step-by-step via browser automation (pass/fail tracked, results written to plan) → auto-fixes findings → squash merges to main on success. + +
+ +### /fix — Bugfix Workflow + +**[`/fix`](https://pilot-shell.com/docs/workflows/fix) is the bugfix command.** Investigate the bug, write the failing test, fix at the root cause, single-pass audit, done. No plan file, no approval mid-flow, no separate verify phase. ```bash -curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/uninstall.sh | bash +pilot +> /fix "annotation persistence drops fields between save and reload" +> /fix "off-by-one in pagination at boundary" +> /fix "wrong default for max_retries" +``` + +``` +Investigate → RED → Fix → Audit → Quality Gate → Done ``` +If investigation reveals the bug is multi-component or architectural, `/fix` stops cleanly and tells you to re-invoke with `/spec`. `/fix` is always quick; `/spec` is the full workflow. +
-Dev Container +How /fix works -Pilot Shell works inside Dev Containers. Copy the [`.devcontainer`](https://github.com/maxritter/pilot-shell/tree/main/.devcontainer) folder from this repository into your project, adapt it to your needs (base image, extensions, dependencies), and run the installer inside the container. The installer auto-detects the container environment and skips system-level dependencies like Homebrew. +For local bugs. Single file, obvious-once-traced root cause. No plan file, no approval mid-flow, no separate verify phase. RED-before-GREEN discipline still enforced — bugfixes without a failing test don't ship. + +- **Investigate:** Reproduce the bug → trace to root cause at `file:line` with `codegraph_context` + targeted reads → state confidence (High/Medium required to proceed). For UI / async / race bugs that don't surface from a static read, add temporary `SPEC-DEBUG:`-marked logs at component boundaries before tracing. +- **RED:** Write the failing test via an existing public entry point → run, must fail with the documented symptom +- **Fix:** Minimal change at the root cause. Symptom patches (`try/except` hiding the bug, swallowed returns) are forbidden. Targeted test module re-runs between fix iterations — full suite runs once at the Quality Gate, not per-fix-task. +- **Audit:** Single-pass scope sanity + symptom-patching grep + **mandatory end-to-end verification** — re-runs the user's actual repro against the running program (Claude Code Chrome → Chrome DevTools MCP → playwright-cli → agent-browser for UI; CLI/API/REPL for non-UI). A passing unit test alone is never accepted as proof; concrete evidence (command + observation) is required in the completion report. +- **Quality Gate:** Lint + types + build + full anti-regression suite, once +- **Bail-out:** If investigation reveals the bug is multi-component, architectural, needs defense-in-depth at multiple layers, or two fix attempts have failed, `/fix` stops cleanly and tells you to re-invoke with `/spec`. It does not silently switch lanes.
---- +
+How /spec handles bugs + +When you type `/spec ""`, the full bugfix workflow runs — for bugs that warrant a written plan, approval, code review, and the full verify ceremony. + +- **Behavior Contract:** every plan pins down `Given / When / Currently / Expected / Anti-regression` — the invariant the fix must produce and the behavior it must not break +- **Three uniform tasks** (always, regardless of bug size): Write Reproducing Test (RED) → Implement Fix at Root Cause → Quality Gate +- **Verify audit:** always-on `cp`+`trap` revert-test (proves the reproducing test would genuinely fail without the fix — rules out retroactive rubber-stamp tests) + root-cause-at-source audit (flags symptom patches and caller-side workarounds) + original-symptom re-check — no sub-agents, tests carry the proof +- **Iteration cap at 3:** after three failed verify cycles, the workflow stops and asks if the bug is architectural rather than letting you loop forever + +
+ +### Status Line + +Pilot shell ships with its own advanced status line with real-time session metrics and spec progress: + +Pilot Shell Status Line + +
+All fields explained + +**Line 1 — Session Metrics** (separated by `|`): + +| Widget | Description | +| ----------------- | ------------------------------------------------------------------------------- | +| **Model** | Active model in short form (`Opus 4.7 [1M]`, `Sonnet 4.6`) | +| **Context** | Effective context usage with progress bar and buffer indicator. Green < 80%, Yellow 80–95%, Red 95%+ | +| **Lines changed** | `+added -removed` in session (hidden when `rate_limits` is available) | +| **Git** | Branch with staged (`+N`) / unstaged (`~N`) counts (hidden when `rate_limits` is available) | +| **5h / 7d usage** | Rate-limit percentage with pacing arrow and reset countdown (`5h: 42% ⇡ 2h`). ⇡ red = over pace, ⇣ green = under pace. Read cross-platform from Claude Code's `rate_limits` stdin field (Pro/Max subscriptions on Claude Code 2.1.80+). Replaces lines+git when present. | +| **Cost** | Session cost in USD. Green < $1, Yellow $1–5, Red $5+. Hidden when `rate_limits` is available — on Pro/Max the subscription covers API usage, so the dollar figure is noise. | +| **Savings** | Token savings percentage from RTK proxy (`Savings: N%`). Always shown when RTK has data. | -## How It Works +**Line 2 — Mode:** + +- **Quick Mode:** `Quick Mode` +- **Spec Mode:** Plan name, type (`feature`/`bugfix`), phase (`plan`/`implement`/`verify`), progress bar, task count, and iteration count + +**Line 3 — Version & Session Info:** + +`Pilot () · CC () · sessions · memories ` + +Pilot tier: Solo, Team, or Trial with time remaining. Claude subscription (Pro/Max/Team/Enterprise) detected via `claude auth status` and cached for 24 hours. + +
### Pilot Shell Console @@ -116,17 +252,20 @@ A local web dashboard with different views and real-time notifications when Clau
All views +Each view with project-specific data has an inline **Project Filter** dropdown — switch projects without leaving the page. Dashboard stats tiles are clickable — navigate directly to the relevant view. + | View | What it shows | | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | -| **Dashboard** | Workspace status, active sessions, spec progress, git info, recent activity | -| **Specification** | All spec plans with task progress, phase tracking, and iteration history. **Annotate mode** lets you mark up plans visually before approving — select text or click **+** on any block to write a note. **Share with Teammate** generates a compressed share link; **Receive Feedback** imports their annotations with accept/reject controls | -| **Extensions** | All extensions — local, plugin, and remote — with team sharing via git, diff view, push/pull, and color-coded categories | +| **Dashboard** | Global command center — 8 clickable stat cards, 4 recent cards (Specifications, Requirements, Sessions, Memories) with "Show all" links. Active specs as pills in the top bar, notification bell in top right. | +| **Sessions** | Browse past sessions with search. Copy the session ID and use `/resume ` in Claude Code to jump back into any session. | +| **Memories** | Browsable observations — decisions, discoveries, bugfixes — with type filters and search. Each memory shows its session — click to navigate. | +| **Requirements** | PRD documents with view/annotate modes. Selected shown as a tab, others in a Previous dropdown. | +| **Specifications** | All spec plans with task progress, phase tracking, and iteration history. **Annotate mode** lets you mark up plans visually before approving — select text or click **+** on any block to write a note. **Share with Teammate** generates a compressed share link; **Receive Feedback** imports their annotations with accept/reject controls | +| **Extensions** | All extensions — local, plugin, and remote — with team sharing via git, diff view, push/pull, and color-coded categories. | | **Changes** | Git diff viewer with staged/unstaged files, branch info, and worktree context. **Review mode** adds inline annotations on diff lines — the agent reads them directly before marking a spec as verified | -| **Memories** | Browsable observations — decisions, discoveries, bugfixes — with type filters and search | -| **Sessions** | Active and past sessions with observation counts and duration | | **Usage** | Daily token costs, model routing breakdown, and usage trends | -| **Settings** | Model selection per command/sub-agent, spec workflow toggles (worktree, questions, approval), reviewer toggles (spec review, changes review, optional Codex), extended context (1M) toggle | | **Help** | Documentation, guides, and quick-start resources | +| **Settings** | Model selection per command/sub-agent, spec workflow toggles (worktree, questions, approval), reviewer toggles (spec review, changes review, optional Codex), extended context (1M) toggle with pricing info |
@@ -168,130 +307,177 @@ This gives you a final quality gate with direct, line-level feedback — the sam -### Status Line +### Extensions -A three-line dashboard rendered below every Claude Code response. Replaces the default status line with real-time session metrics, spec progress, and version info — all color-coded. +Rules, commands, skills, and agents — all plain markdown files in `.claude/` (project) or `~/.claude/` (global). The Console Extensions page lets you browse, edit, compare, and share everything from one place: -``` -Opus 4.6 [1M] | █████░▓ 60% [604K] | +156 -23 | main +2 ~3 | $1.45 | Savings: 65% -Spec: my-feature feature [implement] ████░░░░ 3/6 -Pilot 8.2.1 (Solo) · CC 2.1.79 (Max) · sessions 2 · memories 12 -``` +Pilot Shell Console — Extensions
-All fields explained - -**Line 1 — Session Metrics** (separated by `|`): +Extension categories -| Widget | Description | -| ----------------- | ------------------------------------------------------------------------------- | -| **Model** | Active model in short form (`Opus 4.6 [1M]`, `Sonnet 4.6`) | -| **Context** | Effective context usage with progress bar, buffer indicator, and token count. Green < 80%, Yellow 80–95%, Red 95%+ | -| **Lines changed** | `+added -removed` in session (hidden when usage API data available) | -| **Git** | Branch with staged (`+N`) / unstaged (`~N`) counts | -| **Cost** | Session cost in USD. Green < $1, Yellow $1–5, Red $5+ | | -| **RTK savings** | Token savings percentage from RTK proxy (`Savings: N%`), shown when no usage data | +| Extension | Location | When it loads | +| ------------ | ------------------- | ------------------------------------------- | +| **Skills** | `.claude/skills/` | Automatically when relevant | +| **Rules** | `.claude/rules/` | Every session, or by file type | +| **Commands** | `.claude/commands/` | On demand via `/command-name` | +| **Agents** | `.claude/agents/` | Spawned as sub-agents for specialized tasks | -**Line 2 — Mode:** +Use `/setup-rules` to auto-generate rules from your codebase. Use `/create-skill` to capture workflows as reusable skills. -- **Quick Mode:** `Quick Mode` -- **Spec Mode:** Plan name, type (`feature`/`bugfix`), phase (`plan`/`implement`/`verify`), progress bar, task count, and iteration count +
-**Line 3 — Version & Session Info:** +
+Scopes: Global, Project, Plugin -`Pilot () · CC () · sessions · memories ` +**Project** extensions live in `.claude/` — commit them so teammates get them on `git clone`. **Global** extensions live in `~/.claude/` — personal and available across all projects. Move extensions between scopes with one click. -Pilot tier: Solo, Team, or Trial with time remaining. Claude subscription (Pro/Max/Team/Enterprise) detected via `claude auth status` and cached for 24 hours. +**Plugin** extensions come from installed Claude Code plugins (`claude plugin install `). They appear as read-only items — visible but not editable.
-### /prd — Generate Product Requirements Documents +
+Team sharing & APM (Team tier) -**Use `/prd` before `/spec` when requirements are unclear.** It's a strategic thought partner that turns vague ideas into concrete Product Requirements Documents (PRDs) through one-on-one conversation — with optional research, challenging assumptions, exploring trade-offs, and defining scope before you commit to building. +Connect a git repository to share extensions across your team: -```bash -pilot -> /prd "Add real-time notifications for team updates" -> /prd "We need better onboarding — users drop off after signup" -``` +- **Push** local extensions to the team remote +- **Pull** remote extensions to your machine (global or project scope) +- **Compare** local vs remote with a built-in side-by-side diff view +- **Conflict detection** — when local and remote differ, choose which version to keep -Choose a research tier at the start: **Quick** (skip), **Standard** (web search for competitors, prior art, best practices), or **Deep** (parallel research agents for comprehensive findings). The conversation produces a PRD with problem statement, core user flows, scope boundaries, and technical context — then offers to hand off directly to `/spec` for implementation. PRDs are saved to `docs/prd/` and visible in the Console's **Requirements** tab. +**APM format** — check one box and your remote becomes an [APM package](https://microsoft.github.io/apm/introduction/key-concepts/), directly installable via `apm install owner/repo` by anyone using Copilot, Cursor, OpenCode, or Claude. Extensions are automatically converted to APM conventions on push: -### /spec — Spec-Driven Development +| Pilot Shell | APM Remote | +| --- | --- | +| `rules/my-rule.md` | `instructions/my-rule.instructions.md` | +| `commands/my-cmd.md` | `prompts/my-cmd.prompt.md` | +| `skills/my-skill/SKILL.md` | `skills/my-skill/SKILL.md` | +| `agents/my-agent.md` | `agents/my-agent.agent.md` | + +APM-compatible frontmatter is injected automatically. An `apm.yml` manifest is generated. Toggling APM on/off migrates existing extensions in a single commit. + +
-**`/spec` replaces Claude Code's built-in plan mode** (Shift+Tab). It provides a complete planning workflow with TDD, verification, and code review — use `/spec` instead of plan mode for all planned work. +### Customization -Features, bug fixes, refactoring — describe it and `/spec` handles the rest. Auto-detects whether it's a feature or a bugfix and adapts the workflow. Specs are saved to `docs/plans/` and visible in the Console's **Specification** tab. +Customize everything Pilot auto-installs — tweak the built-in `/spec` workflow, modify existing rules, register additional hooks, add agents, and adjust the auto-applied `settings.json`, `claude.json`, `.mcp.json`, and `.lsp.json`. Source can be a **git repo** (team-wide) or a **local directory** (personal, no git needed). Team and Enterprise plans. ```bash -pilot -> /spec "Add user authentication with OAuth and JWT tokens" # → feature mode -> /spec "Fix the crash when deleting nodes with two children" # → bugfix mode (auto-detected) +pilot customize install # Install and apply +pilot customize update # Pull latest (git) or re-read (local) +pilot customize status # Active source + drift warnings +pilot customize remove # Restore Pilot defaults ``` -``` -Discuss → Plan → Approve → Implement (TDD) → Verify → Done - ↑ ↓ - └── Loop──┘ -``` +
+What you can customize -Pilot Shell Console — Specifications +| Target | How | +|--------|-----| +| **Skills** (built-in workflows like `/spec`, `/prd`) | Overlay ops in `customization.json`: `insert_after`, `insert_before`, `replace`, `disable` — or ship an entirely new skill folder | +| **Rules** | New rules are additive; same filename as a core rule overrides it | +| **Hooks** | Scripts copy to `~/.claude/pilot/hooks/`; ship `hooks.json` to register them alongside Pilot's built-ins | +| **Agents** | Drop `.md` files to add review/helper agents into the spec workflow | +| **MCP servers** | Top-level `.mcp.json` replaces the auto-configured server list | +| **LSP servers** | Top-level `.lsp.json` replaces the auto-configured server list | +| **Claude settings** | Top-level `settings.json` and `claude.json` deep-merge into the user's files — model prefs, permissions, env vars, etc. User state (oauth, projects) is preserved | -
-Feature Mode +Replaced skill fragments stay pinned to upstream by hash. `pilot customize status` surfaces drift when Pilot upgrades a replaced step; `pilot customize diff /` shows what changed so you can port improvements. See the [Customization guide](https://pilot-shell.com/docs/features/customization) for the full schema. -Full exploration workflow for new functionality, refactoring, or architectural changes. +
-**Plan:** Explores codebase with semantic search → asks clarifying questions → writes detailed spec with scope, tasks, and definition of done → for UI features, writes **E2E test scenarios** (step-by-step, browser-executable) that become the verification contract → **spec-review sub-agent** validates completeness → waits for your approval. Optional **Codex adversarial review** provides an independent second opinion when enabled. +
+Repository / folder structure -**Implement:** Creates an isolated git worktree → implements each task with strict TDD (RED → GREEN → REFACTOR) → quality hooks auto-lint, format, and type-check every edit → full test suite after each task. +The layout is the same whether you publish it as a git repo or keep it as a local folder. Directory names map 1:1 to `~/.claude/`: -**Verify:** Full test suite + actual program execution → **unified review sub-agent** (compliance + quality + goal) → for UI features, executes each E2E scenario step-by-step via browser automation (pass/fail tracked, results written to plan) → auto-fixes findings → squash merges to main on success. +``` +my-customization/ +├── customization.json # Required: metadata + optional skill overlays +├── settings.json # Optional: deep-merges into ~/.claude/settings.json +├── claude.json # Optional: deep-merges into ~/.claude.json +├── .mcp.json # Optional: replaces ~/.claude/pilot/.mcp.json +├── .lsp.json # Optional: replaces ~/.claude/pilot/.lsp.json +├── skills/ # → ~/.claude/skills/ +│ ├── spec-plan/steps/ +│ │ └── security-review.md # New step injected into spec-plan +│ └── team-deploy/ # Brand-new skill +│ ├── manifest.json +│ ├── orchestrator.md +│ └── steps/01-stage.md +├── rules/ # → ~/.claude/rules/ +│ ├── team-standards.md # Additive +│ └── testing.md # Overrides core (same filename) +├── hooks/ # → ~/.claude/pilot/hooks/ +│ ├── team-lint-check.sh +│ └── hooks.json # Registers team-lint-check.sh alongside Pilot's hooks +└── agents/ # → ~/.claude/pilot/agents/ + └── team-reviewer.md +``` + +Only ship the files and directories you need. A repo with just `rules/` is a valid customization; so is one with just `.mcp.json`.
-
-Bugfix Mode +### Pilot Bot -Investigation-first workflow for targeted fixes. Finds the root cause before touching any code. +Run Claude Code as a persistent 24/7 automation agent with scheduled tasks, background jobs and heartbeat monitoring: -**Investigate:** Reproduces the bug → traces backward through the call chain to find the **root cause** at a specific `file:line` → compares against working code patterns → states the fix with confidence level. If 3+ hypotheses fail, escalates as an architectural problem. +```bash +pilot bot # Launch automation session (auto-initializes on first run) +``` -**Test-Before-Fix:** Writes a regression test that FAILS on current code → implements the minimal fix at the root cause → verifies all tests pass. Defense-in-depth validation at multiple layers when the bug involves data flowing through shared code paths. +Pilot Bot defines scheduled jobs, automates recurring tasks, and monitor system health around the clock. If the [Telegram Channels plugin](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/telegram) is installed, the bot auto-detects it and enables bidirectional messaging. Similar to OpenClaw, but without the added complexity and costs. -**Verify:** Lightweight verification — regression test confirmation → full test suite → lint + type check → quality checks. No review sub-agents — the regression test proves the fix works, the full suite proves nothing else broke. +
+Bot skills -**Why this matters:** Root cause investigation prevents "fix one thing, break another." The regression test locks in the fix. No formal notation overhead — just trace, test, fix, verify. +| Skill | Purpose | +|-------|---------| +| `bot-boot` | Boot sequence — health check, job registration, heartbeat setup | +| `bot-heartbeat` | Periodic health checks, notifies only when issues are detected | +| `bot-jobs` | Manage scheduled jobs — add, remove, pause, resume, edit, list | +| `bot-channel-task` | Channel task flow — acknowledge, execute, report (when Telegram is available) | +| `bot-defaults` | Standard bot behaviors (dedup, reporting, error handling) |
-### Quick Mode +### /prd — Brainstorm Ideas Into Product Requirements Documents -Just chat — no plan, no approval gate. Quality hooks and TDD enforcement still apply. Best for small tasks and exploration. For anything that needs a plan, use `/spec` — not Claude Code's built-in plan mode. - -### Claude CLI Flag Passthrough - -All Claude Code CLI flags work directly with `pilot` — current and future. Pilot forwards any flag it doesn't recognize to the Claude CLI automatically. +[`/prd`](https://pilot-shell.com/docs/workflows/prd) is the brainstorming surface for ideas that aren't specs yet — vague problem statements and fuzzy shapes. It pitches directions, pressure-tests them with you, and converges on a PRD you can hand to `/spec`. PRDs are saved to `docs/prd/` and visible in the Console's **Requirements** tab. ```bash -pilot --channels plugin:telegram@claude-plugins-official -pilot --model opus --verbose -pilot --resume +pilot +> /prd "Add real-time notifications for team updates" +> /prd "We need better onboarding — users drop off after signup" ``` -### Headless Mode +
+What /prd Does -Run Pilot non-interactively with `-p` for CI/CD pipelines, scripts, and automated workflows. All Claude Code CLI flags work — `--output-format`, `--allowedTools`, `--channels`, `--continue`, `--bare`, etc. +**When to use `/prd` over `/spec`:** `/prd` is for **what** and **why**; `/spec` is for **how**. Reach for `/prd` first when you only have a problem statement, want to riff across multiple directions, or need scope boundaries defined before someone starts building. -```bash -pilot -p "Run tests and fix failures" --allowedTools "Bash,Read,Edit" -pilot -p "Summarize this project" --output-format json -pilot --channels plugin:telegram@official -p "Check messages" -``` +**Flow:** two modes, picked automatically from how fuzzy the idea is: + +1. **Ideate** — free-form prose, Claude pitches 3-5 directions, you react (only runs when the idea is vague) +2. **Clarify → Converge → Write** — structured multiple-choice questions once the shape is known, then the PRD is written + +**Research tiers** (picked at the start): + +| Tier | Behavior | +|------|----------| +| **Quick** | Skip research | +| **Standard** | Web search for competitors, prior art, best practices | +| **Deep** | Parallel research agents for comprehensive findings | + +The final PRD covers problem statement, core user flows, scope boundaries, and technical context — then offers to hand off directly to `/spec` for implementation. + +
### /setup-rules — Generate Modular Rules -Explores your codebase, discovers conventions, generates modular rules and documents MCP servers. Run once initially, then anytime your project changes significantly. +[`/setup-rules`](https://pilot-shell.com/docs/workflows/setup-rules) explores your codebase, discovers conventions, generates modular rules and documents MCP servers. Run once initially, then anytime your project changes significantly. ```bash pilot @@ -301,19 +487,20 @@ pilot
What /setup-rules Does -11 phases that read your codebase and produce comprehensive AI context: +12 phases that read your codebase and produce comprehensive AI context: 0. **Reference** — load best practices for rule structure, path-scoping, and quality standards -1. **Read existing rules** — inventory all `.claude/rules/` files, detect structure and path-scoping +1. **Read existing rules** — inventory all `.claude/rules/` files, detect structure and path-scoping. Also detects `CLAUDE.md` and `AGENTS.md` (the cross-framework agent context file used by Codex, Cursor, etc.) 2. **Migrate unscoped assets** — prefix with project slug for better sharing 3. **Quality audit** — check rules against best practices (size, specificity, stale references, conflicts) 4. **Explore codebase** — semantic search with Probe CLI, structural analysis with CodeGraph 5. **Compare patterns** — discovered vs documented conventions -6. **Sync project rule** — update `{slug}-project.md` with current tech stack, structure, commands +6. **Sync project rule** — update `{slug}-project.md` with current tech stack, structure, commands. Migrates `CLAUDE.md` / `AGENTS.md` content into modular rules 7. **Sync MCP docs** — smoke-test user MCP servers, document working tools 8. **Discover new rules** — find undocumented patterns worth capturing 9. **Cross-check** — validate all references, ensure consistency across generated files -10. **Summary** — report all changes made +10. **Sync AGENTS.md** — if `AGENTS.md` already exists, offer to re-export the updated rules into it so non-Claude agents see the same context. Always asks first, never creates the file if absent, preserves user-authored sections +11. **Summary** — report all changes made **For monorepos:** Organizes rules in nested subdirectories by product and team, with `paths` frontmatter to scope rules to specific file types. Generates a `README.md` documenting the structure. @@ -321,7 +508,7 @@ pilot ### /create-skill — Reusable Skill Creator -Builds a reusable skill from any topic — explores the codebase and creates it interactively with you. If no topic is given, evaluates the current session for extractable knowledge. +[`/create-skill`](https://pilot-shell.com/docs/workflows/create-skill) builds a reusable skill from any topic — explores the codebase and creates it interactively with you. If no topic is given, evaluates the current session for extractable knowledge. ```bash pilot @@ -352,57 +539,60 @@ pilot
-### Extensions +### /benchmark — Measure Rule & Skill Impact -Rules, commands, skills, and agents — all plain markdown files in `.claude/` (project) or `~/.claude/` (global). The Console Extensions page lets you browse, edit, compare, and share everything from one place. Team sharing supports [APM](https://github.com/microsoft/apm) format for cross-tool compatibility. +[`/benchmark`](https://pilot-shell.com/docs/workflows/benchmark) runs your prompts with and without the target, grades outputs against falsifiable assertions, and shows a structured report you can absorb in 30 seconds — labeled verdict, quadrant breakdown, and only the divergent assertions in the drill-down. Finishes with a concrete improvement plan so you know exactly what to change next. -Pilot Shell Console — Extensions +```bash +pilot +> /benchmark pilot/skills/create-skill +> /benchmark pilot/rules/testing.md +```
-Extension categories +What /benchmark Does -| Extension | Location | When it loads | -| ------------ | ------------------- | ------------------------------------------- | -| **Skills** | `.claude/skills/` | Automatically when relevant | -| **Rules** | `.claude/rules/` | Every session, or by file type | -| **Commands** | `.claude/commands/` | On demand via `/command-name` | -| **Agents** | `.claude/agents/` | Spawned as sub-agents for specialized tasks | +Six phases turn a rule or skill into a before/after comparison with an actionable plan: -Use `/setup-rules` to auto-generate rules from your codebase. Use `/create-skill` to capture workflows as reusable skills. +1. **Intake** — pick up an existing `benchmarks//evals.json` or author one +2. **Target discovery** — classify as `skill` or `rules` +3. **Author evals** — draft 3 falsifiable assertions; falsifiability gate ensures baseline actually fails +4. **Execute** — run both configs in isolated sandboxes; grader subagent scores every assertion +5. **Present findings** — three layers, scannable top-to-bottom: -
+ | Layer | Content | + |---|---| + | **Verdict** | One labeled sentence with a recommended next step. Delta bands: 🟢 Strong (≥ +0.50) / 🟢 Moderate (+0.20) / 🟡 Weak (+0.05) / ⚪ Indistinguishable (±0.05) / 🔴 Regression (< −0.05) | + | **Quadrant breakdown** | Counts each assertion as Signal (✓/✗) / Baseline (✓/✓) / Unreachable (✗/✗) / Regression (✗/✓). The dominant quadrant drives the plan | + | **Per-eval drill-down** | Only divergent assertions get a row; matching ones fold into header counts so the report stays under one screen | -
-Scopes: Global, Project, Plugin +6. **Improvement plan** — ≤ 5 ranked proposals in a uniform format (`[TARGET]` or `[EVALS]` tag, location, current quote, replacement, "Lever" line). You pick: apply target edits, iterate on evals, both, or save the plan and stop. Re-runs land in a fresh `runs//` so iteration deltas stay legible. -**Project** extensions live in `.claude/` — commit them so teammates get them on `git clone`. **Global** extensions live in `~/.claude/` — personal and available across all projects. Move extensions between scopes with one click. +**Isolation:** each run gets its own sandbox directory; a globally-installed copy of the target in `~/.claude/` is auto-hidden for the duration and restored afterward (with on-disk recovery manifest covering SIGKILL / power loss / segfault). Conditional-loading frontmatter (`path:` / `paths:`) is stripped from the copy installed into the `with` sandbox so the target loads unconditionally for every prompt — without that, rules scoped to e.g. `paths: ["**/*.py"]` would stay dormant in both configs and the delta would collapse to 0.00. The source file is never modified. -**Plugin** extensions come from installed Claude Code plugins (`claude plugin install `). They appear as read-only items — visible but not editable. +**Key flags:** `--runs N` (default 1), `--configs with,without`, `--workers N`, `--model`, `--no-isolate-global`, `--restore-hidden`.
-
-Team sharing & APM (Team tier) - -Connect a git repository to share extensions across your team: +### Claude CLI Flag Passthrough -- **Push** local extensions to the team remote -- **Pull** remote extensions to your machine (global or project scope) -- **Compare** local vs remote with a built-in side-by-side diff view -- **Conflict detection** — when local and remote differ, choose which version to keep +All Claude Code CLI flags work directly with `pilot` — current and future. Pilot forwards any flag it doesn't recognize to the Claude CLI automatically. -**APM format** — check one box and your remote becomes an [APM package](https://microsoft.github.io/apm/introduction/key-concepts/), directly installable via `apm install owner/repo` by anyone using Copilot, Cursor, OpenCode, or Claude. Extensions are automatically converted to APM conventions on push: +```bash +pilot --channels plugin:telegram@claude-plugins-official +pilot --model opus --verbose +pilot --resume +``` -| Pilot Shell | APM Remote | -| --- | --- | -| `rules/my-rule.md` | `instructions/my-rule.instructions.md` | -| `commands/my-cmd.md` | `prompts/my-cmd.prompt.md` | -| `skills/my-skill/SKILL.md` | `skills/my-skill/SKILL.md` | -| `agents/my-agent.md` | `agents/my-agent.agent.md` | +### Headless Mode -APM-compatible frontmatter is injected automatically. An `apm.yml` manifest is generated. Toggling APM on/off migrates existing extensions in a single commit. +Run Pilot non-interactively with `-p` for CI/CD pipelines, scripts, and automated workflows. All Claude Code CLI flags work — `--output-format`, `--allowedTools`, `--channels`, `--continue`, `--bare`, etc. -
+```bash +pilot -p "Run tests and fix failures" --allowedTools "Bash,Read,Edit" +pilot -p "Summarize this project" --output-format json +pilot --channels plugin:telegram@official -p "Check messages" +``` --- @@ -418,25 +608,30 @@ For full details on every component, see the **[Documentation](https://pilot-she | Component | What it does | | --- | --- | -| [**Hooks Pipeline**](https://pilot-shell.com/docs/features/hooks) | 15 hooks across 7 events — quality checks on every file edit (ruff, ESLint, go vet), TDD enforcement, token optimization via RTK (60–90% savings), memory capture, and session lifecycle management | -| [**Context Optimization**](https://pilot-shell.com/docs/features/context-optimization) | Lean context strategies — conditional rule loading, progressive skill disclosure, lazy MCP tool loading, RTK output compression. Compaction resilience for 200K windows | -| [**Smart Model Routing**](https://pilot-shell.com/docs/features/model-routing) | Opus for planning, Sonnet for implementation and verification. Configurable per-phase via Console Settings. 1M context available — included with API plans (Team, Enterprise); Max plan requires all models set to Opus | -| [**Rules & Standards**](https://pilot-shell.com/docs/features/rules) | 9 built-in rules (workflow, testing, verification, debugging, tools) + 5 coding standards activated by file type (Python, TypeScript, Go, Frontend, Backend) | -| [**MCP Servers**](https://pilot-shell.com/docs/features/mcp-servers) | 6 servers: library docs, persistent memory, web search, GitHub code search, web page fetching, code knowledge graph | -| [**Language Servers**](https://pilot-shell.com/docs/features/language-servers) | Real-time diagnostics for Python (basedpyright), TypeScript (vtsls), Go (gopls). Auto-installed, auto-configured | +| [**Pilot Console**](https://pilot-shell.com/docs/features/console) | Local web dashboard at `localhost:41777` — 10 views (Dashboard, Sessions, Memories, Requirements, Specifications, Extensions, Changes, Usage, Help, Settings). SQLite-backed, nothing leaves your machine | +| [**Pilot Bot**](https://pilot-shell.com/docs/features/bot) | Persistent 24/7 automation agent with scheduled jobs, background tasks, heartbeat monitoring, and optional Telegram integration for bidirectional messaging | +| [**Status Line**](https://pilot-shell.com/docs/features/statusline) | Real-time session dashboard below every response — model, context usage, git status, cost, spec progress, and savings metrics across 3 lines | +| [**Smart Model Routing**](https://pilot-shell.com/docs/features/model-routing) | Opus for planning, Sonnet for implementation and verification. Configurable per-phase via Console Settings, with a Custom… option for pinning explicit Anthropic model IDs (e.g. `claude-opus-4-6`). 1M context available — included with API plans (Team, Enterprise); Max plan requires all models set to Opus | +| [**Rules & Standards**](https://pilot-shell.com/docs/features/rules) | 11 built-in rules for workflow, testing, verification, debugging, code review, documentation sync, tooling, and context protection + 5 coding standards activated by file type (Python, TypeScript, Go, Frontend, Backend) | +| [**Context Optimization**](https://pilot-shell.com/docs/features/context-optimization) | Lean context strategies — context-mode sandbox (large outputs never enter context), RTK output compression, conditional rule loading, progressive skill disclosure, lazy MCP tool loading. Compaction resilience for 200K windows | +| [**Remote Control**](https://pilot-shell.com/docs/features/remote-control) | Control Pilot sessions from your phone, tablet, or any browser — send prompts, monitor progress, and receive notifications remotely | +| [**Hooks Pipeline**](https://pilot-shell.com/docs/features/hooks) | 15 hooks across 7 events — quality checks on every file edit (ruff, ESLint, go vet), TDD enforcement, token optimization via RTK (60–90% savings), session continuity, memory capture, and session lifecycle management | +| [**Extensions**](https://pilot-shell.com/docs/features/extensions) | Unified view of skills, rules, commands, and agents across global, project, plugin, and remote scopes. Team sharing via git with push, pull, diff, and APM-compatible export | +| [**Customization**](https://pilot-shell.com/docs/features/customization) | Customize what Pilot auto-installs — tweak built-in skills (spec, prd, etc.), modify rules, register additional hooks, add agents, and adjust auto-applied MCP / LSP / Claude settings. Source is a git repo (team-wide) or local directory (personal). Skill overlays (`insert_after` / `insert_before` / `replace` / `disable`) modify core workflows without full-file forks; fragments stay pinned to upstream by hash with drift detection. Team and Enterprise plans | | [**Pilot CLI**](https://pilot-shell.com/docs/features/cli) | Session management, headless mode (`-p`) for CI/CD and scripts, worktree isolation, licensing, context monitoring. Run `pilot` or `ccp` to start | +| [**MCP Servers**](https://pilot-shell.com/docs/features/mcp-servers) | 6 preconfigured MCP servers for library docs, persistent memory, web search, GitHub code search, web page fetching, and code knowledge graphs, plus the context-mode plugin for sandboxed execution | +| [**Language Servers**](https://pilot-shell.com/docs/features/language-servers) | Real-time diagnostics for Python (basedpyright), TypeScript (vtsls), Go (gopls). Auto-installed, auto-configured | +| [**Open Source Tools**](https://pilot-shell.com/docs/features/open-source-tools) | 20+ open-source tools installed alongside Pilot — Probe (semantic search), CodeGraph (code intelligence), RTK (token optimization), context-mode, language servers, and system prerequisites | --- ## What Users Say - - -> "I stopped reviewing every line Claude writes. The hooks catch formatting and type errors automatically, TDD catches logic errors, and the spec verifier catches everything else. I review the plan, approve it, and the output is production-grade." +> "Spec-driven development in Pilot Shell is incredible. I'm so impressed that I have to resist the urge to fix every issue all at once." -> "Other frameworks I tried added so much overhead that half my tokens went to the system itself. Pilot Shell is lean — quick mode has zero scaffolding, and even /spec only adds structure where it matters. More of my context goes to actual work." +> "Instead of just letting Claude Code run on its own, you've managed to make it work in a much more organized, consistent, and reliable way within a workflow, which I think is fantastic. What you've built is truly impressive." -> "The persistent memory changed everything. I can pick up a project after a week and Claude already knows my architecture decisions, the bugs we fixed, and why we chose certain patterns. No more re-explaining the same context every session." +> "I have fallen in love with Pilot and just can't stand the idea of having to go back to native Claude." --- @@ -447,23 +642,9 @@ Pilot Shell is source-available under a commercial license. See the [LICENSE](LI | Tier | Seats | Includes | | :------------- | :---- | :-------------------------------------------------------------------------------------------------------------- | | **Solo** | 1 | All features, continuous updates, community support via [GitHub Issues][gh-issues] | -| **Team** | Multi | Solo + extension sharing, seat management, priority support, team onboarding | +| **Team** | Multi | Solo + extension sharing, customization, seat management, priority support | | **Enterprise** | 100+ | Team + full source code access (launcher, console, all components), dedicated support | -A **free 7-day trial** starts automatically on install — full features, no license required. All plans work across multiple personal machines — one subscription, all your devices. - -[gh-issues]: https://github.com/maxritter/pilot-shell/issues - -Details and licensing at [pilot-shell.com](https://pilot-shell.com). - ---- - -## Rolling Out for Your Team? - -I'd love to help figure out if Pilot Shell is the right fit for your team and get everyone set up. For organizations with 100+ developers, the **[Enterprise tier](https://form.typeform.com/to/J7h2jjfw)** includes full source code access. - -**[Book a Call](https://calendly.com/rittermax/pilot-shell)** · **[Enterprise Inquiry](https://form.typeform.com/to/J7h2jjfw)** · **[Send an Email](mailto:mail@maxritter.net)** · **[Connect on LinkedIn](https://www.linkedin.com/in/rittermax/)** - --- ## FAQ @@ -502,7 +683,7 @@ Yes. Pilot Shell enhances Claude Code — it doesn't replace it. You need an act
Does Pilot Shell support AI models beyond Claude? -Pilot Shell is built for Claude Code and uses Anthropic's Claude models (Opus, Sonnet) for all planning, implementation, and verification. Additionally, the optional [Codex plugin](https://github.com/openai/codex-plugin-cc) adds OpenAI-powered adversarial review during `/spec` — an independent second opinion on your plans and code changes. Codex reviewers are disabled by default and can be enabled in Console Settings → Spec Workflow → Codex Reviewers. +Pilot Shell is built for Claude Code and uses Anthropic's Claude models (Opus, Sonnet) for all planning, implementation, and verification. The [Codex plugin](https://github.com/openai/codex-plugin-cc) is included and adds OpenAI-powered adversarial review during `/spec` — an independent second opinion on your plans and code changes. Run `/codex:setup` to authenticate, then enable the reviewers in Console Settings → Reviewers.
@@ -532,7 +713,7 @@ Yes. Pilot Shell installs once globally and works across all your projects — y Pilot Shell sets Claude Code to `bypassPermissions` mode by default so the `/spec` workflow can run autonomously — planning, implementing, and verifying without pausing for permission prompts at every tool call. This is what enables the end-to-end spec-driven development experience. -**In Quick Mode (regular chat), you have full control.** Press `Shift+Tab` at any time to cycle through Claude Code's permission modes: +**In Quick Mode (regular chat), you have full control.** Press `Shift+Tab` at any time to cycle through [Claude Code's permission modes](https://pilot-shell.com/docs/getting-started/permission-modes): | Mode | Behavior | | ---------------- | ----------------------------------------------------- | @@ -553,11 +734,33 @@ For monorepos, organize rules in nested subdirectories by product and team (e.g.
+
+Can I customize Pilot's built-in workflows and defaults? + +Yes — the **Customization** feature on Team and Enterprise plans lets you modify what Pilot Shell auto-installs, not just add alongside it. Tweak the built-in `/spec` workflow (insert a security-review step, replace the planning template, disable a step you don't need), adjust existing rules, register additional hooks, add review agents, change which MCP or LSP servers get configured, and override the auto-applied `settings.json` and `claude.json`. Source is either a **git repo** for your team or a **local directory** for personal use — no git needed for a one-off tweak. + +On **Team**, every developer runs `pilot customize install ` once and stays in sync via `pilot customize update`. Skill overlays stay pinned to Pilot's upstream by hash, so when Pilot ships an improvement to a step you replaced, `pilot customize status` flags the drift and `pilot customize diff` shows you what changed. + +**Enterprise** adds full source-code access to Pilot itself (launcher, console, all components) on top of everything Team gets — so you can fork, audit, and modify the entire stack for regulated environments. See the [Customization guide](https://pilot-shell.com/docs/features/customization) for the full schema. + +
+
Can I use Pilot Shell inside a Dev Container? Yes. Copy the `.devcontainer` folder from this repository into your project, adapt it to your needs (base image, extensions, dependencies), and install Pilot Shell inside the container. Everything works the same — hooks, rules, MCP servers, persistent memory, and the Console dashboard all run inside the container. This is a great option for teams that want a consistent, reproducible development environment. +For tighter isolation when working with untrusted code, layer Claude Code's [`/sandbox`](https://code.claude.com/docs/en/sandboxing) on top — the Dockerfile pre-installs `bubblewrap`, `socat`, `iptables`, and `ipset` so it works out of the box. See Anthropic's [development containers](https://code.claude.com/docs/en/devcontainer) and [sandboxing](https://code.claude.com/docs/en/sandboxing) docs for the hardening patterns. + +
+ +
+What's the difference between pilot and pilot bot? + +**`pilot`** is for interactive development — you chat with Claude, use `/spec` for planned work or quick mode for ad-hoc tasks, and drive every session yourself. It's your daily coding tool. + +**`pilot bot`** is for unattended automation — it launches Claude Code as a persistent background agent that runs 24/7. You define scheduled jobs (health checks, deployments, monitoring) and the bot executes them on a cron schedule with heartbeat monitoring. If the [Telegram plugin](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/telegram) is installed, you can send tasks to the bot from your phone and receive results back. Think of `pilot` as your IDE and `pilot bot` as your ops assistant. +
--- @@ -570,14 +773,12 @@ See the full changelog at [pilot.openchangelog.com](https://pilot.openchangelog. ## Contributing -**Pull Requests** — New features, improvements, and bug fixes are welcome. You can improve Pilot Shell with Pilot Shell — a self-improving loop where your contributions make the tool that makes contributions better. - -**Bug Reports** — Found a bug? [Open an issue](https://github.com/maxritter/pilot-shell/issues) on GitHub. +Found a bug or missing a feature? [Open an issue](https://github.com/maxritter/pilot-shell/issues) on GitHub. ---
-**Claude Code is powerful. Pilot Shell makes it reliable.** +**The Claude Code Engineering Platform.**
diff --git a/console/package.json b/console/package.json index 4ab9a10e..d3f78dbf 100644 --- a/console/package.json +++ b/console/package.json @@ -1,6 +1,6 @@ { "name": "pilot-console", - "version": "7.11.4", + "version": "8.4.1", "description": "Memory system for Pilot Shell - persistent context and observations", "type": "module", "private": true, @@ -35,8 +35,12 @@ "devDependencies": { "@git-diff-view/file": "^0.1.1", "@git-diff-view/react": "^0.1.1", + "@happy-dom/global-registrator": "^20.9.0", "@iconify/react": "^6.0.2", "@tailwindcss/vite": "^4.1.18", + "@testing-library/dom": "^10.4.1", + "@testing-library/react": "^16.3.2", + "@testing-library/user-event": "^14.6.1", "@types/bun": "^1.3.8", "@types/cookie-parser": "^1.4.10", "@types/cors": "^2.8.19", diff --git a/console/src/cli/handlers/session-init.ts b/console/src/cli/handlers/session-init.ts index 0c43446f..05d1bbff 100644 Binary files a/console/src/cli/handlers/session-init.ts and b/console/src/cli/handlers/session-init.ts differ diff --git a/console/src/cli/handlers/user-message.ts b/console/src/cli/handlers/user-message.ts index fe380d58..40324232 100644 Binary files a/console/src/cli/handlers/user-message.ts and b/console/src/cli/handlers/user-message.ts differ diff --git a/console/src/services/sqlite/notifications/store.ts b/console/src/services/sqlite/notifications/store.ts index c179eb52..d178d1ae 100644 Binary files a/console/src/services/sqlite/notifications/store.ts and b/console/src/services/sqlite/notifications/store.ts differ diff --git a/console/src/services/sync/ChromaSync.ts b/console/src/services/sync/ChromaSync.ts index 6f057843..dc5d4898 100644 Binary files a/console/src/services/sync/ChromaSync.ts and b/console/src/services/sync/ChromaSync.ts differ diff --git a/console/src/services/sync/VectorDbSizeGuard.ts b/console/src/services/sync/VectorDbSizeGuard.ts new file mode 100644 index 00000000..a96f22a1 Binary files /dev/null and b/console/src/services/sync/VectorDbSizeGuard.ts differ diff --git a/console/src/services/worker-service.ts b/console/src/services/worker-service.ts index a6e6a1f7..79abcc3c 100644 Binary files a/console/src/services/worker-service.ts and b/console/src/services/worker-service.ts differ diff --git a/console/src/services/worker/DatabaseManager.ts b/console/src/services/worker/DatabaseManager.ts index b10f7f58..43cf7505 100644 Binary files a/console/src/services/worker/DatabaseManager.ts and b/console/src/services/worker/DatabaseManager.ts differ diff --git a/console/src/services/worker/PaginationHelper.ts b/console/src/services/worker/PaginationHelper.ts index d7869bb6..1b0fb8f3 100644 Binary files a/console/src/services/worker/PaginationHelper.ts and b/console/src/services/worker/PaginationHelper.ts differ diff --git a/console/src/services/worker/RetentionScheduler.ts b/console/src/services/worker/RetentionScheduler.ts index 31446e62..4b72dc02 100644 Binary files a/console/src/services/worker/RetentionScheduler.ts and b/console/src/services/worker/RetentionScheduler.ts differ diff --git a/console/src/services/worker/SessionJsonlService.ts b/console/src/services/worker/SessionJsonlService.ts new file mode 100644 index 00000000..79b59cef Binary files /dev/null and b/console/src/services/worker/SessionJsonlService.ts differ diff --git a/console/src/services/worker/http/routes/DataRoutes.ts b/console/src/services/worker/http/routes/DataRoutes.ts index ae268c71..75eecf64 100644 Binary files a/console/src/services/worker/http/routes/DataRoutes.ts and b/console/src/services/worker/http/routes/DataRoutes.ts differ diff --git a/console/src/services/worker/http/routes/ExtensionRoutes.ts b/console/src/services/worker/http/routes/ExtensionRoutes.ts index 4dbb9d0a..5c74eb38 100644 Binary files a/console/src/services/worker/http/routes/ExtensionRoutes.ts and b/console/src/services/worker/http/routes/ExtensionRoutes.ts differ diff --git a/console/src/services/worker/http/routes/LicenseRoutes.ts b/console/src/services/worker/http/routes/LicenseRoutes.ts index e982e60d..579d780c 100644 Binary files a/console/src/services/worker/http/routes/LicenseRoutes.ts and b/console/src/services/worker/http/routes/LicenseRoutes.ts differ diff --git a/console/src/services/worker/http/routes/NotificationRoutes.ts b/console/src/services/worker/http/routes/NotificationRoutes.ts index b7914973..425b0f8d 100644 Binary files a/console/src/services/worker/http/routes/NotificationRoutes.ts and b/console/src/services/worker/http/routes/NotificationRoutes.ts differ diff --git a/console/src/services/worker/http/routes/PlanRoutes.ts b/console/src/services/worker/http/routes/PlanRoutes.ts index 3b77ed0e..d5916b61 100644 Binary files a/console/src/services/worker/http/routes/PlanRoutes.ts and b/console/src/services/worker/http/routes/PlanRoutes.ts differ diff --git a/console/src/services/worker/http/routes/RetentionRoutes.ts b/console/src/services/worker/http/routes/RetentionRoutes.ts index 3bf117dd..0b4bae69 100644 Binary files a/console/src/services/worker/http/routes/RetentionRoutes.ts and b/console/src/services/worker/http/routes/RetentionRoutes.ts differ diff --git a/console/src/services/worker/http/routes/SessionRoutes.ts b/console/src/services/worker/http/routes/SessionRoutes.ts index 53ac393d..e5c2ddaf 100644 Binary files a/console/src/services/worker/http/routes/SessionRoutes.ts and b/console/src/services/worker/http/routes/SessionRoutes.ts differ diff --git a/console/src/services/worker/http/routes/SettingsRoutes.ts b/console/src/services/worker/http/routes/SettingsRoutes.ts index 0f2c25cb..40761c7a 100644 Binary files a/console/src/services/worker/http/routes/SettingsRoutes.ts and b/console/src/services/worker/http/routes/SettingsRoutes.ts differ diff --git a/console/src/services/worker/http/routes/ToolSavingsRoutes.ts b/console/src/services/worker/http/routes/ToolSavingsRoutes.ts new file mode 100644 index 00000000..d6982e56 Binary files /dev/null and b/console/src/services/worker/http/routes/ToolSavingsRoutes.ts differ diff --git a/console/src/services/worker/http/routes/UsageRoutes.ts b/console/src/services/worker/http/routes/UsageRoutes.ts index da7f40fc..f2d6a544 100644 Binary files a/console/src/services/worker/http/routes/UsageRoutes.ts and b/console/src/services/worker/http/routes/UsageRoutes.ts differ diff --git a/console/src/services/worker/http/routes/utils/planFileReader.ts b/console/src/services/worker/http/routes/utils/planFileReader.ts index cdc5cc14..454ec8d9 100644 Binary files a/console/src/services/worker/http/routes/utils/planFileReader.ts and b/console/src/services/worker/http/routes/utils/planFileReader.ts differ diff --git a/console/src/services/worker/http/routes/utils/resolveProjectRoot.ts b/console/src/services/worker/http/routes/utils/resolveProjectRoot.ts index 298b88f5..d4b3e1dc 100644 Binary files a/console/src/services/worker/http/routes/utils/resolveProjectRoot.ts and b/console/src/services/worker/http/routes/utils/resolveProjectRoot.ts differ diff --git a/console/src/services/worker/usage/aggregator.ts b/console/src/services/worker/usage/aggregator.ts new file mode 100644 index 00000000..1122f797 Binary files /dev/null and b/console/src/services/worker/usage/aggregator.ts differ diff --git a/console/src/services/worker/usage/bash-utils.ts b/console/src/services/worker/usage/bash-utils.ts new file mode 100644 index 00000000..a0b37c74 Binary files /dev/null and b/console/src/services/worker/usage/bash-utils.ts differ diff --git a/console/src/services/worker/usage/classifier.ts b/console/src/services/worker/usage/classifier.ts new file mode 100644 index 00000000..9827a8bc Binary files /dev/null and b/console/src/services/worker/usage/classifier.ts differ diff --git a/console/src/services/worker/usage/claude-parser.ts b/console/src/services/worker/usage/claude-parser.ts new file mode 100644 index 00000000..a496900a Binary files /dev/null and b/console/src/services/worker/usage/claude-parser.ts differ diff --git a/console/src/services/worker/usage/fs-utils.ts b/console/src/services/worker/usage/fs-utils.ts new file mode 100644 index 00000000..16d10450 Binary files /dev/null and b/console/src/services/worker/usage/fs-utils.ts differ diff --git a/console/src/services/worker/usage/plan.ts b/console/src/services/worker/usage/plan.ts new file mode 100644 index 00000000..ecd3b767 Binary files /dev/null and b/console/src/services/worker/usage/plan.ts differ diff --git a/console/src/services/worker/usage/pricing.ts b/console/src/services/worker/usage/pricing.ts new file mode 100644 index 00000000..8210af88 Binary files /dev/null and b/console/src/services/worker/usage/pricing.ts differ diff --git a/console/src/services/worker/usage/types.ts b/console/src/services/worker/usage/types.ts new file mode 100644 index 00000000..ec28a411 Binary files /dev/null and b/console/src/services/worker/usage/types.ts differ diff --git a/console/src/services/worker/usage/yield.ts b/console/src/services/worker/usage/yield.ts new file mode 100644 index 00000000..ffcebde2 Binary files /dev/null and b/console/src/services/worker/usage/yield.ts differ diff --git a/console/src/shared/SettingsDefaultsManager.ts b/console/src/shared/SettingsDefaultsManager.ts index 97f8ce17..79b1142f 100644 Binary files a/console/src/shared/SettingsDefaultsManager.ts and b/console/src/shared/SettingsDefaultsManager.ts differ diff --git a/console/src/shared/hook-constants.ts b/console/src/shared/hook-constants.ts index f944a305..cc96e1db 100644 Binary files a/console/src/shared/hook-constants.ts and b/console/src/shared/hook-constants.ts differ diff --git a/console/src/shared/paths.ts b/console/src/shared/paths.ts index 7e839f02..28bedc9d 100644 Binary files a/console/src/shared/paths.ts and b/console/src/shared/paths.ts differ diff --git a/console/src/shared/sharing/types.ts b/console/src/shared/sharing/types.ts index 5f5c6d3f..dc7fc550 100644 Binary files a/console/src/shared/sharing/types.ts and b/console/src/shared/sharing/types.ts differ diff --git a/console/src/ui/viewer/components/NotificationBell.tsx b/console/src/ui/viewer/components/NotificationBell.tsx index 23303444..edbbf8c6 100644 Binary files a/console/src/ui/viewer/components/NotificationBell.tsx and b/console/src/ui/viewer/components/NotificationBell.tsx differ diff --git a/console/src/ui/viewer/components/ProjectFilter.tsx b/console/src/ui/viewer/components/ProjectFilter.tsx new file mode 100644 index 00000000..f2d05589 Binary files /dev/null and b/console/src/ui/viewer/components/ProjectFilter.tsx differ diff --git a/console/src/ui/viewer/components/ui/Modal.tsx b/console/src/ui/viewer/components/ui/Modal.tsx index f19e9086..b9130ec5 100644 Binary files a/console/src/ui/viewer/components/ui/Modal.tsx and b/console/src/ui/viewer/components/ui/Modal.tsx differ diff --git a/console/src/ui/viewer/hooks/useExtensions.ts b/console/src/ui/viewer/hooks/useExtensions.ts index 6ba3b326..10e5d4c1 100644 Binary files a/console/src/ui/viewer/hooks/useExtensions.ts and b/console/src/ui/viewer/hooks/useExtensions.ts differ diff --git a/console/src/ui/viewer/hooks/useNotifications.ts b/console/src/ui/viewer/hooks/useNotifications.ts index deb1f54f..873b3f17 100644 Binary files a/console/src/ui/viewer/hooks/useNotifications.ts and b/console/src/ui/viewer/hooks/useNotifications.ts differ diff --git a/console/src/ui/viewer/hooks/usePlan.ts b/console/src/ui/viewer/hooks/usePlan.ts new file mode 100644 index 00000000..5042b149 Binary files /dev/null and b/console/src/ui/viewer/hooks/usePlan.ts differ diff --git a/console/src/ui/viewer/hooks/useRouter.ts b/console/src/ui/viewer/hooks/useRouter.ts index eebbcba5..17ec3266 100644 Binary files a/console/src/ui/viewer/hooks/useRouter.ts and b/console/src/ui/viewer/hooks/useRouter.ts differ diff --git a/console/src/ui/viewer/hooks/useSettings.ts b/console/src/ui/viewer/hooks/useSettings.ts index 1dab9af4..536bed4c 100644 Binary files a/console/src/ui/viewer/hooks/useSettings.ts and b/console/src/ui/viewer/hooks/useSettings.ts differ diff --git a/console/src/ui/viewer/hooks/useStats.ts b/console/src/ui/viewer/hooks/useStats.ts index 6948dc90..4e90ad74 100644 Binary files a/console/src/ui/viewer/hooks/useStats.ts and b/console/src/ui/viewer/hooks/useStats.ts differ diff --git a/console/src/ui/viewer/hooks/useToolSavings.ts b/console/src/ui/viewer/hooks/useToolSavings.ts new file mode 100644 index 00000000..7629a194 Binary files /dev/null and b/console/src/ui/viewer/hooks/useToolSavings.ts differ diff --git a/console/src/ui/viewer/hooks/useUsage.ts b/console/src/ui/viewer/hooks/useUsage.ts index 322a931b..acf6aebf 100644 Binary files a/console/src/ui/viewer/hooks/useUsage.ts and b/console/src/ui/viewer/hooks/useUsage.ts differ diff --git a/console/src/ui/viewer/hooks/useYield.ts b/console/src/ui/viewer/hooks/useYield.ts new file mode 100644 index 00000000..57982bf2 Binary files /dev/null and b/console/src/ui/viewer/hooks/useYield.ts differ diff --git a/console/src/ui/viewer/layouts/Sidebar/SidebarFooter.tsx b/console/src/ui/viewer/layouts/Sidebar/SidebarFooter.tsx index 40d70418..97baf23d 100644 Binary files a/console/src/ui/viewer/layouts/Sidebar/SidebarFooter.tsx and b/console/src/ui/viewer/layouts/Sidebar/SidebarFooter.tsx differ diff --git a/console/src/ui/viewer/layouts/Sidebar/SidebarNav.tsx b/console/src/ui/viewer/layouts/Sidebar/SidebarNav.tsx index 4cd64d93..4b4f0eaf 100644 Binary files a/console/src/ui/viewer/layouts/Sidebar/SidebarNav.tsx and b/console/src/ui/viewer/layouts/Sidebar/SidebarNav.tsx differ diff --git a/console/src/ui/viewer/layouts/Sidebar/SidebarProjectSelector.tsx b/console/src/ui/viewer/layouts/Sidebar/SidebarProjectSelector.tsx index a1121ebc..31d9db10 100644 Binary files a/console/src/ui/viewer/layouts/Sidebar/SidebarProjectSelector.tsx and b/console/src/ui/viewer/layouts/Sidebar/SidebarProjectSelector.tsx differ diff --git a/console/src/ui/viewer/layouts/Sidebar/index.tsx b/console/src/ui/viewer/layouts/Sidebar/index.tsx index 900d65f5..590bf6aa 100644 Binary files a/console/src/ui/viewer/layouts/Sidebar/index.tsx and b/console/src/ui/viewer/layouts/Sidebar/index.tsx differ diff --git a/console/src/ui/viewer/layouts/Topbar/TopbarActions.tsx b/console/src/ui/viewer/layouts/Topbar/TopbarActions.tsx index a85e14fc..9526124c 100644 Binary files a/console/src/ui/viewer/layouts/Topbar/TopbarActions.tsx and b/console/src/ui/viewer/layouts/Topbar/TopbarActions.tsx differ diff --git a/console/src/ui/viewer/layouts/Topbar/TopbarSpecs.tsx b/console/src/ui/viewer/layouts/Topbar/TopbarSpecs.tsx new file mode 100644 index 00000000..2960270d Binary files /dev/null and b/console/src/ui/viewer/layouts/Topbar/TopbarSpecs.tsx differ diff --git a/console/src/ui/viewer/layouts/Topbar/index.tsx b/console/src/ui/viewer/layouts/Topbar/index.tsx index 71ba7602..1ded4082 100644 Binary files a/console/src/ui/viewer/layouts/Topbar/index.tsx and b/console/src/ui/viewer/layouts/Topbar/index.tsx differ diff --git a/console/src/ui/viewer/views/Changes/DiffPanel.tsx b/console/src/ui/viewer/views/Changes/DiffPanel.tsx index 91948bb1..8cc1a715 100644 Binary files a/console/src/ui/viewer/views/Changes/DiffPanel.tsx and b/console/src/ui/viewer/views/Changes/DiffPanel.tsx differ diff --git a/console/src/ui/viewer/views/Changes/FileList.tsx b/console/src/ui/viewer/views/Changes/FileList.tsx index 57963ca9..45af03f1 100644 Binary files a/console/src/ui/viewer/views/Changes/FileList.tsx and b/console/src/ui/viewer/views/Changes/FileList.tsx differ diff --git a/console/src/ui/viewer/views/Changes/index.tsx b/console/src/ui/viewer/views/Changes/index.tsx index 5525e5d4..6b11209a 100644 Binary files a/console/src/ui/viewer/views/Changes/index.tsx and b/console/src/ui/viewer/views/Changes/index.tsx differ diff --git a/console/src/ui/viewer/views/Changes/review/CodeReviewPanel.tsx b/console/src/ui/viewer/views/Changes/review/CodeReviewPanel.tsx index 381a6f8b..94094491 100644 Binary files a/console/src/ui/viewer/views/Changes/review/CodeReviewPanel.tsx and b/console/src/ui/viewer/views/Changes/review/CodeReviewPanel.tsx differ diff --git a/console/src/ui/viewer/views/Changes/useSpecCorrelation.ts b/console/src/ui/viewer/views/Changes/useSpecCorrelation.ts index 9ac79d55..b79de7ef 100644 Binary files a/console/src/ui/viewer/views/Changes/useSpecCorrelation.ts and b/console/src/ui/viewer/views/Changes/useSpecCorrelation.ts differ diff --git a/console/src/ui/viewer/views/Dashboard/PlanStatus.tsx b/console/src/ui/viewer/views/Dashboard/PlanStatus.tsx index 7efc7311..1311442d 100644 Binary files a/console/src/ui/viewer/views/Dashboard/PlanStatus.tsx and b/console/src/ui/viewer/views/Dashboard/PlanStatus.tsx differ diff --git a/console/src/ui/viewer/views/Dashboard/RecentActivity.tsx b/console/src/ui/viewer/views/Dashboard/RecentActivity.tsx index 3259ff3f..b2756b70 100644 Binary files a/console/src/ui/viewer/views/Dashboard/RecentActivity.tsx and b/console/src/ui/viewer/views/Dashboard/RecentActivity.tsx differ diff --git a/console/src/ui/viewer/views/Dashboard/RecentRequirements.tsx b/console/src/ui/viewer/views/Dashboard/RecentRequirements.tsx new file mode 100644 index 00000000..ce503228 Binary files /dev/null and b/console/src/ui/viewer/views/Dashboard/RecentRequirements.tsx differ diff --git a/console/src/ui/viewer/views/Dashboard/RecentSessions.tsx b/console/src/ui/viewer/views/Dashboard/RecentSessions.tsx index b0d6ae4e..ff1ce2e6 100644 Binary files a/console/src/ui/viewer/views/Dashboard/RecentSessions.tsx and b/console/src/ui/viewer/views/Dashboard/RecentSessions.tsx differ diff --git a/console/src/ui/viewer/views/Dashboard/RecentSpecs.tsx b/console/src/ui/viewer/views/Dashboard/RecentSpecs.tsx new file mode 100644 index 00000000..9761d169 Binary files /dev/null and b/console/src/ui/viewer/views/Dashboard/RecentSpecs.tsx differ diff --git a/console/src/ui/viewer/views/Dashboard/StatsCard.tsx b/console/src/ui/viewer/views/Dashboard/StatsCard.tsx index c32497b3..70783997 100644 Binary files a/console/src/ui/viewer/views/Dashboard/StatsCard.tsx and b/console/src/ui/viewer/views/Dashboard/StatsCard.tsx differ diff --git a/console/src/ui/viewer/views/Dashboard/StatsGrid.tsx b/console/src/ui/viewer/views/Dashboard/StatsGrid.tsx index 1c961512..fdcafb05 100644 Binary files a/console/src/ui/viewer/views/Dashboard/StatsGrid.tsx and b/console/src/ui/viewer/views/Dashboard/StatsGrid.tsx differ diff --git a/console/src/ui/viewer/views/Dashboard/UsageSummary.tsx b/console/src/ui/viewer/views/Dashboard/UsageSummary.tsx index bbfeaae6..67409e66 100644 Binary files a/console/src/ui/viewer/views/Dashboard/UsageSummary.tsx and b/console/src/ui/viewer/views/Dashboard/UsageSummary.tsx differ diff --git a/console/src/ui/viewer/views/Dashboard/index.tsx b/console/src/ui/viewer/views/Dashboard/index.tsx index 4a5c8a8d..cd77c279 100644 Binary files a/console/src/ui/viewer/views/Dashboard/index.tsx and b/console/src/ui/viewer/views/Dashboard/index.tsx differ diff --git a/console/src/ui/viewer/views/Extensions/ExtensionsView.tsx b/console/src/ui/viewer/views/Extensions/ExtensionsView.tsx index c1234afd..024e4776 100644 Binary files a/console/src/ui/viewer/views/Extensions/ExtensionsView.tsx and b/console/src/ui/viewer/views/Extensions/ExtensionsView.tsx differ diff --git a/console/src/ui/viewer/views/Help/index.tsx b/console/src/ui/viewer/views/Help/index.tsx index fe259d77..4771f3d9 100644 Binary files a/console/src/ui/viewer/views/Help/index.tsx and b/console/src/ui/viewer/views/Help/index.tsx differ diff --git a/console/src/ui/viewer/views/Memories/MemoryCard.tsx b/console/src/ui/viewer/views/Memories/MemoryCard.tsx index 06fc2765..ee1b8048 100644 Binary files a/console/src/ui/viewer/views/Memories/MemoryCard.tsx and b/console/src/ui/viewer/views/Memories/MemoryCard.tsx differ diff --git a/console/src/ui/viewer/views/Memories/MemoryDetailModal.tsx b/console/src/ui/viewer/views/Memories/MemoryDetailModal.tsx index 8a432a86..3a5d6de0 100644 Binary files a/console/src/ui/viewer/views/Memories/MemoryDetailModal.tsx and b/console/src/ui/viewer/views/Memories/MemoryDetailModal.tsx differ diff --git a/console/src/ui/viewer/views/Memories/index.tsx b/console/src/ui/viewer/views/Memories/index.tsx index 087dae02..c750a831 100644 Binary files a/console/src/ui/viewer/views/Memories/index.tsx and b/console/src/ui/viewer/views/Memories/index.tsx differ diff --git a/console/src/ui/viewer/views/Requirements/index.tsx b/console/src/ui/viewer/views/Requirements/index.tsx index 64cc0467..989d475b 100644 Binary files a/console/src/ui/viewer/views/Requirements/index.tsx and b/console/src/ui/viewer/views/Requirements/index.tsx differ diff --git a/console/src/ui/viewer/views/Sessions/SessionCard.tsx b/console/src/ui/viewer/views/Sessions/SessionCard.tsx index ae81ffd8..d3ca203f 100644 Binary files a/console/src/ui/viewer/views/Sessions/SessionCard.tsx and b/console/src/ui/viewer/views/Sessions/SessionCard.tsx differ diff --git a/console/src/ui/viewer/views/Sessions/SessionDetail.tsx b/console/src/ui/viewer/views/Sessions/SessionDetail.tsx new file mode 100644 index 00000000..49b96192 Binary files /dev/null and b/console/src/ui/viewer/views/Sessions/SessionDetail.tsx differ diff --git a/console/src/ui/viewer/views/Sessions/SessionTimeline.tsx b/console/src/ui/viewer/views/Sessions/SessionTimeline.tsx index adaedaed..e1ed1f60 100644 Binary files a/console/src/ui/viewer/views/Sessions/SessionTimeline.tsx and b/console/src/ui/viewer/views/Sessions/SessionTimeline.tsx differ diff --git a/console/src/ui/viewer/views/Sessions/index.tsx b/console/src/ui/viewer/views/Sessions/index.tsx index 3a8f1028..e521b3ce 100644 Binary files a/console/src/ui/viewer/views/Sessions/index.tsx and b/console/src/ui/viewer/views/Sessions/index.tsx differ diff --git a/console/src/ui/viewer/views/Settings/ModelSelect.tsx b/console/src/ui/viewer/views/Settings/ModelSelect.tsx index ba4c4549..9daabf79 100644 Binary files a/console/src/ui/viewer/views/Settings/ModelSelect.tsx and b/console/src/ui/viewer/views/Settings/ModelSelect.tsx differ diff --git a/console/src/ui/viewer/views/Settings/index.tsx b/console/src/ui/viewer/views/Settings/index.tsx index ad3816f1..9d72bc15 100644 Binary files a/console/src/ui/viewer/views/Settings/index.tsx and b/console/src/ui/viewer/views/Settings/index.tsx differ diff --git a/console/src/ui/viewer/views/Spec/SpecHeaderCard.tsx b/console/src/ui/viewer/views/Spec/SpecHeaderCard.tsx index 3bf84f6e..271c0e66 100644 Binary files a/console/src/ui/viewer/views/Spec/SpecHeaderCard.tsx and b/console/src/ui/viewer/views/Spec/SpecHeaderCard.tsx differ diff --git a/console/src/ui/viewer/views/Spec/annotation/AnnotationPanel.tsx b/console/src/ui/viewer/views/Spec/annotation/AnnotationPanel.tsx index b2a655e8..fcc129d3 100644 Binary files a/console/src/ui/viewer/views/Spec/annotation/AnnotationPanel.tsx and b/console/src/ui/viewer/views/Spec/annotation/AnnotationPanel.tsx differ diff --git a/console/src/ui/viewer/views/Spec/annotation/BlockRenderer.tsx b/console/src/ui/viewer/views/Spec/annotation/BlockRenderer.tsx index 6e7e239d..13ef0122 100644 Binary files a/console/src/ui/viewer/views/Spec/annotation/BlockRenderer.tsx and b/console/src/ui/viewer/views/Spec/annotation/BlockRenderer.tsx differ diff --git a/console/src/ui/viewer/views/Spec/annotation/PlanAnnotator.tsx b/console/src/ui/viewer/views/Spec/annotation/PlanAnnotator.tsx index 8287a947..6ee5ac4a 100644 Binary files a/console/src/ui/viewer/views/Spec/annotation/PlanAnnotator.tsx and b/console/src/ui/viewer/views/Spec/annotation/PlanAnnotator.tsx differ diff --git a/console/src/ui/viewer/views/Spec/annotation/useAnnotation.ts b/console/src/ui/viewer/views/Spec/annotation/useAnnotation.ts index d2c4d872..3a929d6a 100644 Binary files a/console/src/ui/viewer/views/Spec/annotation/useAnnotation.ts and b/console/src/ui/viewer/views/Spec/annotation/useAnnotation.ts differ diff --git a/console/src/ui/viewer/views/Spec/index.tsx b/console/src/ui/viewer/views/Spec/index.tsx index 712aa9d3..3bfdf91a 100644 Binary files a/console/src/ui/viewer/views/Spec/index.tsx and b/console/src/ui/viewer/views/Spec/index.tsx differ diff --git a/console/src/ui/viewer/views/Spec/parsePlanContent.ts b/console/src/ui/viewer/views/Spec/parsePlanContent.ts new file mode 100644 index 00000000..37c56e5a Binary files /dev/null and b/console/src/ui/viewer/views/Spec/parsePlanContent.ts differ diff --git a/console/src/ui/viewer/views/Spec/sharing/ShareDialog.tsx b/console/src/ui/viewer/views/Spec/sharing/ShareDialog.tsx index 100409f6..ac976d5d 100644 Binary files a/console/src/ui/viewer/views/Spec/sharing/ShareDialog.tsx and b/console/src/ui/viewer/views/Spec/sharing/ShareDialog.tsx differ diff --git a/console/src/ui/viewer/views/Spec/sharing/useShare.ts b/console/src/ui/viewer/views/Spec/sharing/useShare.ts index d87bc065..5148f51c 100644 Binary files a/console/src/ui/viewer/views/Spec/sharing/useShare.ts and b/console/src/ui/viewer/views/Spec/sharing/useShare.ts differ diff --git a/console/src/ui/viewer/views/Usage/ModelCostBreakdown.tsx b/console/src/ui/viewer/views/Usage/ModelCostBreakdown.tsx new file mode 100644 index 00000000..ce1a1164 Binary files /dev/null and b/console/src/ui/viewer/views/Usage/ModelCostBreakdown.tsx differ diff --git a/console/src/ui/viewer/views/Usage/ModelRoutingInfo.tsx b/console/src/ui/viewer/views/Usage/ModelRoutingInfo.tsx index ba8fa59b..257f8dfb 100644 Binary files a/console/src/ui/viewer/views/Usage/ModelRoutingInfo.tsx and b/console/src/ui/viewer/views/Usage/ModelRoutingInfo.tsx differ diff --git a/console/src/ui/viewer/views/Usage/PlanPanel.tsx b/console/src/ui/viewer/views/Usage/PlanPanel.tsx new file mode 100644 index 00000000..bf3678ef Binary files /dev/null and b/console/src/ui/viewer/views/Usage/PlanPanel.tsx differ diff --git a/console/src/ui/viewer/views/Usage/ToolSavingsCards.tsx b/console/src/ui/viewer/views/Usage/ToolSavingsCards.tsx new file mode 100644 index 00000000..6d7aa47d Binary files /dev/null and b/console/src/ui/viewer/views/Usage/ToolSavingsCards.tsx differ diff --git a/console/src/ui/viewer/views/Usage/UsageSummaryCards.tsx b/console/src/ui/viewer/views/Usage/UsageSummaryCards.tsx index 21dd49f5..74b94d60 100644 Binary files a/console/src/ui/viewer/views/Usage/UsageSummaryCards.tsx and b/console/src/ui/viewer/views/Usage/UsageSummaryCards.tsx differ diff --git a/console/src/ui/viewer/views/Usage/YieldPanel.tsx b/console/src/ui/viewer/views/Usage/YieldPanel.tsx new file mode 100644 index 00000000..f2a66d49 Binary files /dev/null and b/console/src/ui/viewer/views/Usage/YieldPanel.tsx differ diff --git a/console/src/ui/viewer/views/Usage/index.tsx b/console/src/ui/viewer/views/Usage/index.tsx index 9b3f5e8f..964ca5cc 100644 Binary files a/console/src/ui/viewer/views/Usage/index.tsx and b/console/src/ui/viewer/views/Usage/index.tsx differ diff --git a/console/src/utils/logger.ts b/console/src/utils/logger.ts index 9b03ef1a..14cf00b9 100644 --- a/console/src/utils/logger.ts +++ b/console/src/utils/logger.ts @@ -47,7 +47,8 @@ export type Component = | "RETENTION" | "BACKUP" | "CLEANUP" - | "DATA"; + | "DATA" + | "VECTOR_DB_GUARD"; interface LogContext { sessionId?: number; diff --git a/console/src/utils/pilot-spawn.ts b/console/src/utils/pilot-spawn.ts new file mode 100644 index 00000000..1930d0a2 Binary files /dev/null and b/console/src/utils/pilot-spawn.ts differ diff --git a/console/tests/annotation/plan-annotator-persistence.test.tsx b/console/tests/annotation/plan-annotator-persistence.test.tsx new file mode 100644 index 00000000..f66f6cfc Binary files /dev/null and b/console/tests/annotation/plan-annotator-persistence.test.tsx differ diff --git a/console/tests/fixtures/jsonl/empty-session.jsonl b/console/tests/fixtures/jsonl/empty-session.jsonl new file mode 100644 index 00000000..f34595ff Binary files /dev/null and b/console/tests/fixtures/jsonl/empty-session.jsonl differ diff --git a/console/tests/fixtures/jsonl/malformed-session.jsonl b/console/tests/fixtures/jsonl/malformed-session.jsonl new file mode 100644 index 00000000..73be61de Binary files /dev/null and b/console/tests/fixtures/jsonl/malformed-session.jsonl differ diff --git a/console/tests/fixtures/jsonl/resumed-session.jsonl b/console/tests/fixtures/jsonl/resumed-session.jsonl new file mode 100644 index 00000000..c7d3a57d Binary files /dev/null and b/console/tests/fixtures/jsonl/resumed-session.jsonl differ diff --git a/console/tests/fixtures/jsonl/valid-session.jsonl b/console/tests/fixtures/jsonl/valid-session.jsonl new file mode 100644 index 00000000..138b388f Binary files /dev/null and b/console/tests/fixtures/jsonl/valid-session.jsonl differ diff --git a/console/tests/fixtures/usage-jsonl/-tmp-projA/session-001-resumed.jsonl b/console/tests/fixtures/usage-jsonl/-tmp-projA/session-001-resumed.jsonl new file mode 100644 index 00000000..ee47b2e9 Binary files /dev/null and b/console/tests/fixtures/usage-jsonl/-tmp-projA/session-001-resumed.jsonl differ diff --git a/console/tests/fixtures/usage-jsonl/-tmp-projA/session-001.jsonl b/console/tests/fixtures/usage-jsonl/-tmp-projA/session-001.jsonl new file mode 100644 index 00000000..03f89518 Binary files /dev/null and b/console/tests/fixtures/usage-jsonl/-tmp-projA/session-001.jsonl differ diff --git a/console/tests/fixtures/usage-jsonl/-tmp-projB/session-002.jsonl b/console/tests/fixtures/usage-jsonl/-tmp-projB/session-002.jsonl new file mode 100644 index 00000000..a05389d4 Binary files /dev/null and b/console/tests/fixtures/usage-jsonl/-tmp-projB/session-002.jsonl differ diff --git a/console/tests/hooks/useUsage.test.ts b/console/tests/hooks/useUsage.test.ts index 0d4ac390..187d64cd 100644 Binary files a/console/tests/hooks/useUsage.test.ts and b/console/tests/hooks/useUsage.test.ts differ diff --git a/console/tests/server/license-routes.test.ts b/console/tests/server/license-routes.test.ts index 2123114f..483a6f04 100644 Binary files a/console/tests/server/license-routes.test.ts and b/console/tests/server/license-routes.test.ts differ diff --git a/console/tests/settings-routes.test.ts b/console/tests/settings-routes.test.ts index 45c6c621..31e82d1a 100644 Binary files a/console/tests/settings-routes.test.ts and b/console/tests/settings-routes.test.ts differ diff --git a/console/tests/shared/paths.test.ts b/console/tests/shared/paths.test.ts new file mode 100644 index 00000000..088882c3 Binary files /dev/null and b/console/tests/shared/paths.test.ts differ diff --git a/console/tests/ui/Changes-pickActivePlanPath.test.ts b/console/tests/ui/Changes-pickActivePlanPath.test.ts new file mode 100644 index 00000000..47d7f733 Binary files /dev/null and b/console/tests/ui/Changes-pickActivePlanPath.test.ts differ diff --git a/console/tests/ui/ChangesNavigation.test.ts b/console/tests/ui/ChangesNavigation.test.ts index c4eba8d3..218ff97a 100644 Binary files a/console/tests/ui/ChangesNavigation.test.ts and b/console/tests/ui/ChangesNavigation.test.ts differ diff --git a/console/tests/ui/ChangesView.test.ts b/console/tests/ui/ChangesView.test.ts index 7150e2e5..8ee0f1ab 100644 Binary files a/console/tests/ui/ChangesView.test.ts and b/console/tests/ui/ChangesView.test.ts differ diff --git a/console/tests/ui/CodeReviewPanel-diffFilter.test.ts b/console/tests/ui/CodeReviewPanel-diffFilter.test.ts new file mode 100644 index 00000000..09acc43b Binary files /dev/null and b/console/tests/ui/CodeReviewPanel-diffFilter.test.ts differ diff --git a/console/tests/ui/dashboard-project-filter.test.ts b/console/tests/ui/dashboard-project-filter.test.ts index 7caa2e28..f62254f7 100644 Binary files a/console/tests/ui/dashboard-project-filter.test.ts and b/console/tests/ui/dashboard-project-filter.test.ts differ diff --git a/console/tests/ui/model-routing-info.test.ts b/console/tests/ui/model-routing-info.test.ts index 0dc46fa2..2577d09b 100644 Binary files a/console/tests/ui/model-routing-info.test.ts and b/console/tests/ui/model-routing-info.test.ts differ diff --git a/console/tests/ui/notification-bell.test.ts b/console/tests/ui/notification-bell.test.ts index 4f46f875..e3da6456 100644 Binary files a/console/tests/ui/notification-bell.test.ts and b/console/tests/ui/notification-bell.test.ts differ diff --git a/console/tests/ui/project-selector.test.ts b/console/tests/ui/project-selector.test.ts index 01dcc5f6..bce76f64 100644 Binary files a/console/tests/ui/project-selector.test.ts and b/console/tests/ui/project-selector.test.ts differ diff --git a/console/tests/ui/search-removal.test.ts b/console/tests/ui/search-removal.test.ts index 6f0f6468..a2f5e7bc 100644 Binary files a/console/tests/ui/search-removal.test.ts and b/console/tests/ui/search-removal.test.ts differ diff --git a/console/tests/ui/spec-section-rendering.test.ts b/console/tests/ui/spec-section-rendering.test.ts new file mode 100644 index 00000000..180c518f Binary files /dev/null and b/console/tests/ui/spec-section-rendering.test.ts differ diff --git a/console/tests/ui/stats-grid-spec-cards.test.ts b/console/tests/ui/stats-grid-spec-cards.test.ts index a0b38638..4b3f82d5 100644 Binary files a/console/tests/ui/stats-grid-spec-cards.test.ts and b/console/tests/ui/stats-grid-spec-cards.test.ts differ diff --git a/console/tests/ui/terminal-preview-xss.test.ts b/console/tests/ui/terminal-preview-xss.test.ts index fad0233c..223064f7 100644 Binary files a/console/tests/ui/terminal-preview-xss.test.ts and b/console/tests/ui/terminal-preview-xss.test.ts differ diff --git a/console/tests/ui/usage-view.test.ts b/console/tests/ui/usage-view.test.ts index a53f4528..2a142bad 100644 Binary files a/console/tests/ui/usage-view.test.ts and b/console/tests/ui/usage-view.test.ts differ diff --git a/console/tests/ui/use-stats-spec-metrics.test.ts b/console/tests/ui/use-stats-spec-metrics.test.ts index e1da0bf2..f339402d 100644 Binary files a/console/tests/ui/use-stats-spec-metrics.test.ts and b/console/tests/ui/use-stats-spec-metrics.test.ts differ diff --git a/console/tests/unit/services/sync/ChromaConnectionManager.test.ts b/console/tests/unit/services/sync/ChromaConnectionManager.test.ts index ecf11b33..fd595a90 100644 Binary files a/console/tests/unit/services/sync/ChromaConnectionManager.test.ts and b/console/tests/unit/services/sync/ChromaConnectionManager.test.ts differ diff --git a/console/tests/unit/services/sync/ChromaConnectionManager.venv.test.ts b/console/tests/unit/services/sync/ChromaConnectionManager.venv.test.ts index 0c584b98..b7e1f350 100644 Binary files a/console/tests/unit/services/sync/ChromaConnectionManager.venv.test.ts and b/console/tests/unit/services/sync/ChromaConnectionManager.venv.test.ts differ diff --git a/console/tests/unit/services/sync/ChromaSync.vacuum.test.ts b/console/tests/unit/services/sync/ChromaSync.vacuum.test.ts index 72fef013..160cfd6b 100644 Binary files a/console/tests/unit/services/sync/ChromaSync.vacuum.test.ts and b/console/tests/unit/services/sync/ChromaSync.vacuum.test.ts differ diff --git a/console/tests/unit/services/sync/VectorDbSizeGuard.test.ts b/console/tests/unit/services/sync/VectorDbSizeGuard.test.ts new file mode 100644 index 00000000..422dd64f Binary files /dev/null and b/console/tests/unit/services/sync/VectorDbSizeGuard.test.ts differ diff --git a/console/tests/unit/services/worker/SessionJsonlService.test.ts b/console/tests/unit/services/worker/SessionJsonlService.test.ts new file mode 100644 index 00000000..d08b59da Binary files /dev/null and b/console/tests/unit/services/worker/SessionJsonlService.test.ts differ diff --git a/console/tests/unit/services/worker/usage/aggregator.test.ts b/console/tests/unit/services/worker/usage/aggregator.test.ts new file mode 100644 index 00000000..9039ea84 Binary files /dev/null and b/console/tests/unit/services/worker/usage/aggregator.test.ts differ diff --git a/console/tests/unit/services/worker/usage/classifier.test.ts b/console/tests/unit/services/worker/usage/classifier.test.ts new file mode 100644 index 00000000..c78bf179 Binary files /dev/null and b/console/tests/unit/services/worker/usage/classifier.test.ts differ diff --git a/console/tests/unit/services/worker/usage/claude-parser.test.ts b/console/tests/unit/services/worker/usage/claude-parser.test.ts new file mode 100644 index 00000000..de52f498 Binary files /dev/null and b/console/tests/unit/services/worker/usage/claude-parser.test.ts differ diff --git a/console/tests/unit/services/worker/usage/cross-view-parity.test.ts b/console/tests/unit/services/worker/usage/cross-view-parity.test.ts new file mode 100644 index 00000000..9d4bb33b Binary files /dev/null and b/console/tests/unit/services/worker/usage/cross-view-parity.test.ts differ diff --git a/console/tests/unit/services/worker/usage/plan.test.ts b/console/tests/unit/services/worker/usage/plan.test.ts new file mode 100644 index 00000000..ae00406b Binary files /dev/null and b/console/tests/unit/services/worker/usage/plan.test.ts differ diff --git a/console/tests/unit/services/worker/usage/pricing.test.ts b/console/tests/unit/services/worker/usage/pricing.test.ts new file mode 100644 index 00000000..2cecd68e Binary files /dev/null and b/console/tests/unit/services/worker/usage/pricing.test.ts differ diff --git a/console/tests/utils/pilot-spawn.test.ts b/console/tests/utils/pilot-spawn.test.ts new file mode 100644 index 00000000..88a83e3c Binary files /dev/null and b/console/tests/utils/pilot-spawn.test.ts differ diff --git a/console/tests/worker/active-specs-route.test.ts b/console/tests/worker/active-specs-route.test.ts new file mode 100644 index 00000000..c360a4d2 Binary files /dev/null and b/console/tests/worker/active-specs-route.test.ts differ diff --git a/console/tests/worker/extension-routes.test.ts b/console/tests/worker/extension-routes.test.ts index c6635748..ab8f417e 100644 Binary files a/console/tests/worker/extension-routes.test.ts and b/console/tests/worker/extension-routes.test.ts differ diff --git a/console/tests/worker/license-routes.test.ts b/console/tests/worker/license-routes.test.ts index 4f89ac4a..7958ab7c 100644 Binary files a/console/tests/worker/license-routes.test.ts and b/console/tests/worker/license-routes.test.ts differ diff --git a/console/tests/worker/notification-routes.test.ts b/console/tests/worker/notification-routes.test.ts index 8b51ecc1..78bbb84b 100644 Binary files a/console/tests/worker/notification-routes.test.ts and b/console/tests/worker/notification-routes.test.ts differ diff --git a/console/tests/worker/plan-author-parsing.test.ts b/console/tests/worker/plan-author-parsing.test.ts index bcbbcd7e..2de895cd 100644 Binary files a/console/tests/worker/plan-author-parsing.test.ts and b/console/tests/worker/plan-author-parsing.test.ts differ diff --git a/console/tests/worker/usage-routes.test.ts b/console/tests/worker/usage-routes.test.ts index 01c4bc63..94fca6ef 100644 Binary files a/console/tests/worker/usage-routes.test.ts and b/console/tests/worker/usage-routes.test.ts differ diff --git a/console/tests/worker/utils/ChromaSync.deleteDocuments.test.ts b/console/tests/worker/utils/ChromaSync.deleteDocuments.test.ts index 359cded4..ad9e87d6 100644 Binary files a/console/tests/worker/utils/ChromaSync.deleteDocuments.test.ts and b/console/tests/worker/utils/ChromaSync.deleteDocuments.test.ts differ diff --git a/console/tsconfig.json b/console/tsconfig.json index 7f33f1b0..3a52a1ca 100644 --- a/console/tsconfig.json +++ b/console/tsconfig.json @@ -1,5 +1,6 @@ { "compilerOptions": { + "ignoreDeprecations": "5.0", "target": "ES2022", "lib": ["ES2023", "DOM", "DOM.Iterable"], "module": "ESNext", @@ -24,7 +25,8 @@ "include": [ "src/**/*.ts", "src/**/*.tsx", - "tests/**/*.ts" + "tests/**/*.ts", + "tests/**/*.tsx" ], "exclude": [ "node_modules", diff --git a/docs/docusaurus/docs/features/bot.md b/docs/docusaurus/docs/features/bot.md new file mode 100644 index 00000000..08aac38c --- /dev/null +++ b/docs/docusaurus/docs/features/bot.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 2 +--- + +# Pilot Bot + +Persistent automation agent — scheduled tasks, background jobs, heartbeat monitoring, 24/7 operation. Always runs on Sonnet for cost-effective automation. + +```bash +pilot bot +``` + +Auto-initializes `~/.pilot/bot/` on first run. Only one global instance at a time (PID-enforced). Uses `--continue` to resume previous sessions. + +## Optional: Telegram + +Install the [Telegram Channels plugin](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/telegram) to enable bidirectional messaging. Pilot Bot auto-detects it at launch — no extra configuration needed. Without Telegram, the bot works as a standalone automation tool. + +## Skills + +| Skill | Purpose | +|-------|---------| +| **bot-boot** | Boot sequence — health check, job registration via CronCreate, heartbeat setup | +| **bot-heartbeat** | Periodic health check (every 30 min), silent when no issues, dedup via lock file | +| **bot-jobs** | Manage scheduled jobs — `list`, `add`, `remove`, `pause`, `resume`, `edit` | +| **bot-channel-task** | Channel message flow — acknowledge, execute, report (when Telegram available) | +| **bot-defaults** | Standard behaviors — cron deduplication, reporting rules, error handling | + +## Config + +``` +~/.pilot/bot/ +├── .bot-pid # PID file (managed automatically) +└── JOBS.yaml # Scheduled job definitions (auto-created) +``` + +Jobs persist in `JOBS.yaml`. CronCreate registrations are session-scoped and re-registered on every boot. diff --git a/docs/docusaurus/docs/features/cli.md b/docs/docusaurus/docs/features/cli.md index 04e7491a..4cc7ebab 100644 --- a/docs/docusaurus/docs/features/cli.md +++ b/docs/docusaurus/docs/features/cli.md @@ -10,12 +10,73 @@ Command reference for the `pilot` binary at `~/.pilot/bin/pilot`. Run `pilot` or `ccp` with no arguments to start Claude with Pilot enhancements. Most commands support `--json` for structured output. Multiple sessions can run in parallel on the same project. -## Claude CLI Flag Passthrough +## Session & context -Pilot forwards any unrecognized flags directly to the Claude CLI. This means all current and future Claude Code flags work out of the box — no Pilot update required. +| Command | Description | +|---------|-------------| +| `pilot` | Start Claude with Pilot enhancements, auto-update, and license check | +| `pilot [claude-flags...]` | Start Claude with any Claude CLI flags passed through | +| `pilot -p "prompt" [flags...]` | Headless mode — non-interactive for CI/CD, scripts | +| `pilot run [flags...]` | Explicit alias for starting Claude | +| `ccp` | Alias for `pilot` | +| `pilot check-context --json` | Get current context usage percentage | +| `pilot register-plan ` | Associate a plan file with the current session | +| `pilot sessions [--json]` | Show count of active Pilot sessions | +| `pilot statusline` | Run the status line formatter (called by Claude Code) | +| `pilot notify <message> [--plan-path PATH] [--json]` | Send a notification to the Console dashboard (type: `info`, `plan_approval`, `attention_needed`, `verification_complete`) | +| `pilot skill-build <skill-dir> [--output <path>] [--dry-run] [--json]` | Build `SKILL.md` and `hashes.json` from a skill's manifest + fragments | +| `pilot --version` | Show Pilot Shell version | + +## Bot mode + +| Command | Description | +|---------|-------------| +| `pilot bot` | Launch Pilot Bot — persistent automation session with scheduled tasks, background jobs, and optional Telegram | + +## Worktree isolation + +| Command | Description | +|---------|-------------| +| `pilot worktree create --json <slug>` | Create isolated git worktree | +| `pilot worktree detect --json <slug>` | Check if a worktree already exists | +| `pilot worktree diff --json <slug>` | List changed files in the worktree | +| `pilot worktree sync --json <slug>` | Squash merge worktree changes back to base branch | +| `pilot worktree cleanup --json <slug>` | Remove worktree and branch (`--force` to skip checks, `--discard` to drop changes) | +| `pilot worktree status --json` | Show active worktree info for current session | + +:::info Slug format +The `<slug>` for worktree commands is the plan filename without the date prefix and `.md` extension. Example: `docs/plans/2026-02-22-add-auth.md` → `add-auth`. +::: + +## License & auth + +| Command | Description | +|---------|-------------| +| `pilot activate <key>` | Activate a license key on this machine | +| `pilot deactivate` | Deactivate license on this machine | +| `pilot status [--json]` | Show current license status and tier | +| `pilot verify [--json]` | Verify license validity (used by hooks) | +| `pilot trial --check [--json]` | Check trial eligibility for this machine | +| `pilot trial --start [--json]` | Start a trial (one-time per machine) | + +## Customization (Team / Enterprise) + +Compose custom steps into core workflow skills and ship team rules, hooks, and agents. Source is either a git URL (team-wide) or a local directory (personal). See [Customization](/docs/features/customization) for the full overlay schema. + +| Command | Description | +|---------|-------------| +| `pilot customize install <source> [--branch <b>] [--subfolder <p>] [--json]` | Install and apply. `<source>` = git URL or local directory path. `--branch` applies to git sources only. | +| `pilot customize update [--json]` | Re-apply — pulls git sources, reads local sources in place | +| `pilot customize status [--json]` | Show active source, file counts, and drift warnings | +| `pilot customize diff <skill>/<step-id> [--json]` | Unified diff between pinned replacement and current upstream | +| `pilot customize remove [--json]` | Delete pack files and regenerate pristine `SKILL.md` | + +## Claude CLI flag passthrough + +Pilot forwards any unrecognized flags directly to the Claude CLI — all current and future Claude Code flags work out of the box, no Pilot update required. ```bash -# Use any Claude CLI flag directly +# Any Claude CLI flag works directly pilot --channels plugin:telegram@claude-plugins-official pilot --model opus --verbose pilot --resume @@ -27,7 +88,7 @@ pilot run --channels plugin:telegram@claude-plugins-official Pilot only intercepts its own subcommands (`activate`, `status`, `worktree`, etc.) and flags (`--version`, `--skip-update-check`). Everything else passes through to `claude`. -## Headless Mode +## Headless mode Run Pilot non-interactively with `-p` (or `--print`). Wraps `claude -p` with license validation and the Pilot plugin — use it in CI/CD pipelines, scripts, or automated workflows. @@ -52,45 +113,3 @@ pilot -p "Summarize this file" --bare --allowedTools "Read" ``` All [Claude Code CLI flags](https://code.claude.com/docs/en/cli-reference) work with `-p`, including `--output-format`, `--allowedTools`, `--continue`, `--resume`, `--channels`, `--append-system-prompt`, `--json-schema`, and `--bare`. Pilot-specific flags like `--skip-update-check` are stripped automatically. - -## Session & Context - -| Command | Description | -|---------|-------------| -| `pilot` | Start Claude with Pilot enhancements, auto-update, and license check | -| `pilot [claude-flags...]` | Start Claude with any Claude CLI flags passed through | -| `pilot -p "prompt" [flags...]` | Headless mode — run non-interactively with Pilot plugin (CI/CD, scripts) | -| `pilot run [flags...]` | Explicit alias for starting Claude (all flags passed through) | -| `ccp` | Alias for pilot | -| `pilot check-context --json` | Get current context usage percentage | -| `pilot register-plan <path> <status>` | Associate a plan file with the current session | -| `pilot sessions [--json]` | Show count of active Pilot sessions | -| `pilot statusline` | Run the status line formatter (called by Claude Code) | -| `pilot notify <event> [data]` | Send a notification to the Console dashboard | -| `pilot --version` | Show Pilot Shell version | - -## Worktree Isolation - -| Command | Description | -|---------|-------------| -| `pilot worktree create --json <slug>` | Create isolated git worktree | -| `pilot worktree detect --json <slug>` | Check if a worktree already exists | -| `pilot worktree diff --json <slug>` | List changed files in the worktree | -| `pilot worktree sync --json <slug>` | Squash merge worktree changes back to base branch | -| `pilot worktree cleanup --json <slug>` | Remove worktree and branch (`--force` to skip checks, `--discard` to drop changes) | -| `pilot worktree status --json` | Show active worktree info for current session | - -## License & Auth - -| Command | Description | -|---------|-------------| -| `pilot activate <key>` | Activate a license key on this machine | -| `pilot deactivate` | Deactivate license on this machine | -| `pilot status [--json]` | Show current license status and tier | -| `pilot verify [--json]` | Verify license validity (used by hooks) | -| `pilot trial --check [--json]` | Check trial eligibility for this machine | -| `pilot trial --start [--json]` | Start a trial (one-time per machine) | - -:::info Slug format -The `<slug>` parameter for worktree commands is the plan filename without the date prefix and `.md` extension. For example, `docs/plans/2026-02-22-add-auth.md` → `add-auth`. -::: diff --git a/docs/docusaurus/docs/features/console.md b/docs/docusaurus/docs/features/console.md index 15686805..e8b17da7 100644 --- a/docs/docusaurus/docs/features/console.md +++ b/docs/docusaurus/docs/features/console.md @@ -8,167 +8,147 @@ description: Local web dashboard at localhost:41777 — monitor and manage your Local web dashboard at `localhost:41777` — monitor and manage your sessions. -The Console runs locally as a Bun/Express server with a React web UI. It's automatically started when you launch Pilot and stopped when all sessions close. All data — memories, sessions, usage — is stored in a local SQLite database. Nothing leaves your machine. +The Console runs locally as a Bun/Express server with a React web UI. It starts when you launch Pilot and stops when all sessions close. All data — memories, sessions, usage — lives in a local SQLite database. Nothing leaves your machine. ```bash $ open http://localhost:41777 ``` -## 9 Views +## Views + +Each view that supports project filtering has an inline **Project Filter** dropdown next to the title. The **Dashboard** shows stats across all projects with clickable tiles that navigate to the relevant view. | View | Description | |------|-------------| -| **Dashboard** | Workspace status, active sessions, spec progress, git info, recent activity. Your real-time command center. | -| **Specifications** | All spec plans with task progress (checkboxes), phase tracking (PENDING/COMPLETE/VERIFIED), and iteration history. | -| **Extensions** | All extensions — local, plugin, and remote — with team sharing via git (push, pull, diff), color-coded categories, and scope filtering (All / Global / Project / Plugin / Remote). | -| **Changes** | Git diff viewer with staged/unstaged files, branch info, and worktree context. | -| **Memories** | Browsable observations — decisions, discoveries, bugfixes — with type filters, search, and timeline view. | -| **Sessions** | Active and past sessions with observation counts, duration, and the ability to browse session context. | -| **Usage** | Daily token costs, model routing breakdown (Opus vs Sonnet distribution), and usage trends over time. | -| **Settings** | Model selection per command and sub-agent (Sonnet 4.6 vs Opus 4.6). Spec workflow toggles (worktree support, ask questions, plan approval). Reviewer toggles (spec review, changes review) and optional Codex adversarial reviewers. Extended context (1M) toggle. | -| **Help** | Embedded documentation from pilot-shell.com — full technical reference without leaving the Console. | +| **Dashboard** | Global command center — 8 clickable stat cards (Projects, Sessions, Active, Memories, Extensions, Requirements, Specifications, Changes), 4 recent-item cards with "Show all" links, active specs as pills in the top bar, notification bell in the top right. | +| **Sessions** | Browse past sessions with search. Copy a session ID and run `/resume <session-id>` to jump back in — all context, files, and conversation history restored. | +| **Memories** | Observations (decisions, discoveries, bugfixes) with type filters and search. Each memory links back to the session it came from. | +| **Requirements** | PRD documents with view/annotate modes. Selected opens as a tab, others live in a Previous dropdown. | +| **Specifications** | Spec plans with task progress, phase tracking (PENDING/COMPLETE/VERIFIED), and iteration history. Hosts Plan Annotation and Spec Sharing (below). | +| **Extensions** | All extensions — local, plugin, remote — with team sharing via git (push, pull, diff), color-coded categories, and scope filtering. | +| **Changes** | Git diff viewer with staged/unstaged files, branch info, worktree context. Hosts Code Review and Spec Task Correlation (below). | +| **Usage** | Daily token costs, model routing breakdown (Opus vs Sonnet), and usage trends. | +| **Help** | Embedded pilot-shell.com documentation — full technical reference without leaving the Console. | +| **Settings** | Model selection, spec workflow toggles, reviewer toggles, extended context toggle. See [Settings](#settings) below. | -## Plan Annotation & Code Review +## Plan Annotation -The Console provides two live annotation mechanisms that let you shape what gets built and verify what was built — without leaving the browser. Annotations save automatically as you write them; the agent reads them directly at review checkpoints. +When a spec plan is in the planning phase (PENDING, not yet approved), the Specifications tab auto-opens in **Annotate mode**. Toggle between View and Annotate using the control next to the "Specifications" heading. -### Plan Annotation +Select any passage and write a free-text note in the popover that appears — no type selection, no submit button. Annotations save immediately and appear in the sidebar panel, where you can edit or delete them. -When a spec plan is in the planning phase (PENDING, not yet approved), the Specifications tab automatically opens in **Annotate mode**. You can also toggle between View and Annotate modes using the prominent toggle next to the "Specifications" heading. +When the agent reaches the approval checkpoint, it reads your annotations directly from the Console, incorporates every note into the plan, and asks for approval again. Just write your notes and say "ready" when done. -In Annotate mode, the entire plan is rendered as selectable text. Select any passage and write a free-text note in the popover that appears. That's it — no type selection, no submit button. Your annotation is immediately saved and visible in the sidebar panel. +## Code Review -The sidebar shows all your annotations with the selected text and your note. You can edit or delete any annotation at any time. +After a spec completes automated verification, the agent prompts you to review the code changes. Switch to the **Changes** tab and enable **Review mode** using the toggle next to the "Changes" heading. -When the agent reaches the approval checkpoint, it reads your annotations directly from the Console, incorporates every note into the plan, and asks for approval again. You don't need to do anything — just write your notes and say "ready" when done. +In Review mode, a **+** button appears on hover for every diff line. Click it to open an inline annotation form — write your note and press Save. Annotations appear in a panel at the bottom of the diff viewer. -### Code Review +The agent reads your code-review annotations directly from the Console before marking the spec verified. Say "fix" to have it address them, "approve" to mark the spec as verified. Annotations persist across page reloads, so you can review asynchronously while the agent runs verification in the background. -After a spec completes all automated verification checks, the agent prompts you to review the code changes before marking the spec as verified. The **Changes** tab is located right next to Specifications in the sidebar — switch there and enable **Review mode** using the toggle next to the "Changes" heading. +## Spec Task Correlation -In Review mode, a **+** button appears on hover for every diff line. Click it to open an inline annotation form below that line — write your note and press Save. The annotation appears in the panel at the bottom of the diff viewer. +When a `/spec` task is active, the Changes tab correlates each changed file with the spec task that touched it — instant traceability. -The agent reads your code review annotations directly from the Console before marking the spec as verified. Say "fix" to have it address your annotations, or "approve" to mark the spec as verified. +- Each file in the file list shows a **T{N}** badge (e.g., `T1`, `T3`) linking it to the matching spec task +- Hover the badge for the full task name +- Click the **Spec** button to switch to **group-by-spec** view — files organized by spec name and task number +- Correlation is parsed from the `**Files:**` section of each task, so any spec following the standard format works automatically -Annotations persist across page reloads, so you can review asynchronously while the agent runs verification in the background. +Especially useful for multi-task specs: instead of scrolling a flat file list, review changes task by task. -### Spec Sharing +## Spec Sharing -Share specifications with teammates for collaborative review — no cloud service required. Everything works entirely locally with compressed URLs. +Share specs with teammates for collaborative review — no cloud service required. Everything works locally with compressed URLs. -**Sharing a spec:** +**Share:** -1. Open a spec in the Specifications tab -2. Click **Share with Teammate** in the metadata row -3. A share URL is generated — the spec content and your annotations are compressed and encoded in the URL fragment (never sent to any server) -4. Copy the URL and send it to your colleague via Slack, email, or any channel -5. The **Receive Feedback** dialog opens automatically so you're ready to import their response +1. Open a spec, click **Share with Teammate** in the metadata row +2. A share URL is generated — the spec content and your annotations are compressed into the URL fragment (per the HTTP spec, fragments are never sent to any server) +3. Copy the URL and send it via Slack, email, or any channel +4. The **Receive Feedback** dialog opens automatically so you're ready for their response -**Reviewing shared specs:** +**Review a shared spec:** -1. Your colleague opens the URL in their Pilot Console (`localhost:41777`) -2. They see the full spec with your annotations displayed as read-only highlights -3. They can add their own feedback — either by selecting text or clicking the **+** button on any block -4. Click **Send Feedback** to generate a feedback URL and copy it to clipboard +1. Your colleague opens the URL in their Pilot Console +2. They see your spec and annotations as read-only highlights +3. They add their own feedback via text selection or the **+** button on any block +4. Click **Send Feedback** to generate a feedback URL -**Importing feedback:** +**Import feedback:** 1. Click **Receive Feedback** on the original spec -2. Paste the feedback URL — a preview shows the incoming annotations -3. Import adds annotations with `pending` status to your annotation panel -4. **Accept** or **Reject** each annotation individually, or use **Accept All** / **Reject All** -5. The view auto-switches to Annotate mode so you see the imported feedback immediately - -**Deduplication:** Importing the same feedback twice is safe — annotations matching existing ones (same text and selection) are automatically skipped. +2. Paste the URL — a preview shows the incoming annotations +3. **Accept** or **Reject** each annotation individually, or use **Accept All** / **Reject All** -**Privacy:** All shared data lives in the URL fragment, which per the HTTP spec is never sent to any server — no data reaches pilot-shell.com or any third party. For specs larger than ~32KB compressed, an embedded paste service stores the compressed data locally in `~/.pilot/share/` with automatic 3-day expiry. +Importing the same feedback twice is safe — annotations matching existing ones are skipped. For specs larger than ~32KB compressed, an embedded paste service stores the data locally in `~/.pilot/share/` with 3-day auto-expiry. :::tip Both annotation methods work everywhere -The **+** button on each block and text selection both work on the normal review page and the shared spec feedback page. Use whichever is more convenient — the **+** button is more reliable for quick block-level comments. +The **+** button and text selection both work on the normal review page and on shared feedback pages. The **+** button is more reliable for quick block-level comments. ::: -## Smart Notifications via SSE +## Notifications -The Console sends real-time alerts via Server-Sent Events when Claude needs your input or a significant phase completes. You don't need to watch the terminal constantly — the Console notifies you. +The Console sends real-time alerts via Server-Sent Events when Claude needs your input or a significant phase completes — no need to watch the terminal. -- Plan requires your approval — review and respond in the terminal or via notification +- Plan requires your approval — review and respond - Spec phase completed — implementation done, verification starting -- Clarification needed — Claude is waiting for design decisions before proceeding +- Clarification needed — Claude is waiting for design decisions - Session ended — completion summary with observation count ## Settings -The Settings tab (`localhost:41777/#/settings`) controls how Pilot Shell behaves. Changes are saved to `~/.pilot/config.json` and take effect after restarting Claude Code. - -### Model Preferences +The Settings tab (`localhost:41777/#/settings`) controls Pilot Shell behavior. Changes save to `~/.pilot/config.json` and take effect after restarting Claude Code. -Choose between **Sonnet 4.6** ($3/$15 per MTok) and **Opus 4.6** ($5/$25 per MTok) for each component independently. +### Model preferences -#### General +Choose between **Sonnet 4.6** ($3/$15 per MTok) and **Opus 4.7** ($5/$25 per MTok) independently per component. -| Setting | Default | Description | -|---------|---------|-------------| +| Component | Default | Scope | +|-----------|---------|-------| | **Main Session** | Opus | Quick mode and direct chat | - -#### Spec Phases - -| Phase | Default | Description | -|-------|---------|-------------| | **Planning** | Opus | Codebase exploration, architecture design, plan writing | | **Implementation** | Sonnet | TDD loop — write test, write code, verify | | **Verification** | Sonnet | Test execution, code review orchestration | -#### Extended Context (1M) - -Toggle for using the 1M token context window instead of 200K. API subscribers (Team, Enterprise) get this at no additional cost with all models. Max plan users must set all models to Opus for 1M to work — Sonnet 1M is not included in Max. - -### Spec Workflow - -#### Review Agents - -Two independent sub-agents that run in separate context windows during `/spec`: - -| Agent | Default | Description | -|-------|---------|-------------| -| **Spec Review** | On | Validates plans before implementation. Checks alignment with requirements and flags risky assumptions. | -| **Changes Review** | On | Reviews code after implementation. Checks compliance, security, test coverage, and goal achievement. Reads all changed files. | - -Each agent has its own model selector (Sonnet or Opus). Disabling an agent skips it entirely — no tokens consumed. +**Custom model IDs** — each dropdown also offers a **Custom…** option. Selecting it reveals a text input where you can pin an explicit Anthropic model ID such as `claude-opus-4-6`, `claude-opus-4-5`, or `claude-sonnet-4-5-20250929`. Useful for reproducibility, team standardization, or falling back to an older model when a newer release mis-triggers content filters on legitimate code. The value is passed through to Claude Code verbatim. -#### Codex Reviewers (Optional) +**Extended Context (1M):** toggle for the 1M token context window instead of 200K. API subscribers (Team, Enterprise) get this at no additional cost with all models. Max plan users must set all models to Opus — Sonnet 1M is not included in Max. The toggle only applies to the `sonnet` and `opus` aliases; custom model IDs are sent verbatim, so pick the concrete ID for the context window you want. -Adversarial review agents powered by OpenAI Codex that provide an independent second opinion: +### Review agents -| Agent | Default | Description | -|-------|---------|-------------| -| **Codex Spec Review** | Off | Adversarial plan review — provides an independent second opinion on plans. | -| **Codex Changes Review** | Off | Adversarial code review — provides an independent second opinion on implementations. | +Two Claude sub-agents run in separate context windows during `/spec`. Each has its own model selector; disabling an agent skips it entirely. -#### Automation +| Agent | Default | Role | +|-------|---------|------| +| **Spec Review** | On | Validates plans before implementation. Checks alignment with requirements, flags risky assumptions. | +| **Changes Review** | On | Reviews code after implementation. Checks compliance, security, test coverage, goal achievement. | -Three toggles that control user interaction points during `/spec`. Disable all three for fully autonomous operation. +**Codex adversarial reviewers (optional)** — OpenAI Codex agents that provide an independent second opinion. -| Toggle | Default | When enabled | When disabled | -|--------|---------|-------------|---------------| -| **Worktree Support** | On | Asks whether to isolate changes in a git worktree at the start of `/spec` | Worktree is always skipped — changes go directly on the current branch | -| **Ask Questions** | On | Asks clarifying questions during planning to resolve ambiguities | Planning runs fully autonomous — makes default choices without asking | -| **Plan Approval** | On | Requires your approval before implementation starts | Implementation begins automatically after planning completes | +| Agent | Default | Role | +|-------|---------|------| +| **Codex Spec Review** | Off | Adversarial plan review — second opinion before implementation. | +| **Codex Changes Review** | Off | Adversarial code review — second opinion after implementation. | -#### Fully Autonomous Mode +### Automation toggles -To make `/spec` run end-to-end without any user interaction: +Three toggles control user interaction points during `/spec`. Disable all three for fully autonomous end-to-end execution. -1. Disable **Worktree Support** — skips the worktree prompt -2. Disable **Ask Questions** — planning makes autonomous decisions -3. Disable **Plan Approval** — implementation starts automatically +| Toggle | Default | Enabled | Disabled | +|--------|---------|---------|----------| +| **Worktree Support** | On | Asks how to handle branching at `/spec` start | Skips the branch question — changes go on the current branch | +| **Ask Questions** | On | Asks clarifying questions during planning | Planning makes autonomous default choices | +| **Plan Approval** | On | Requires your approval before implementation starts | Implementation begins automatically after planning | -With all three off, typing `/spec add user authentication` will plan, implement, and verify the feature completely autonomously. You can review the output when it's done. +With all three off, `/spec add user authentication` plans, implements, and verifies the feature end-to-end without checkpoints. -:::warning Token usage -Fully autonomous mode means no checkpoints — Claude will execute the entire workflow without asking. Make sure your prompt is specific enough to avoid misinterpretation. You can always interrupt with Escape. +:::warning Token usage in autonomous mode +No checkpoints means Claude executes the entire workflow without asking. Make sure your prompt is specific enough to avoid misinterpretation. You can always interrupt with Escape. ::: -### Config File +### Config file All settings are stored in `~/.pilot/config.json`: @@ -204,4 +184,4 @@ All settings are stored in `~/.pilot/config.json`: } ``` -You can edit this file directly — the Console Settings UI is a convenience wrapper. Changes require a Claude Code restart to take effect. +You can edit this file directly — the Settings UI is a convenience wrapper. Changes require a Claude Code restart. diff --git a/docs/docusaurus/docs/features/context-optimization.md b/docs/docusaurus/docs/features/context-optimization.md index e902c157..9923bc88 100644 --- a/docs/docusaurus/docs/features/context-optimization.md +++ b/docs/docusaurus/docs/features/context-optimization.md @@ -1,54 +1,63 @@ --- sidebar_position: 3 title: Context Optimization -description: Strategies for maximizing effective context usage across the 200K and 1M context windows +description: Keep the context window lean and recover cleanly when it fills up --- # Context Optimization -Strategies for maximizing effective context usage. +Two things matter for a long-running Claude session: keeping the context window lean so tokens go to your code, and handling the moments when it fills up anyway. -With 1M context windows (API subscribers on Team and Enterprise get this with all models; Max plan users must set all models to Opus), compaction is rare — most sessions complete well within the available context. Pilot Shell's optimization strategies focus on **keeping context lean** so Claude spends tokens on your code, not on overhead. +With 1M context windows (API subscribers on Team and Enterprise get this on all models; Max plan users must set all models to Opus), compaction is rare — most sessions complete well within the available context. Pilot Shell's strategies focus on **staying lean**, and making **compaction and parallel work** painless when they happen. -## Token Reduction - -Pilot Shell reduces context consumption at multiple levels: +## Keeping context lean | Strategy | Savings | How | |----------|---------|-----| +| **context-mode sandbox** | Up to 98% | Routes large-output commands to a sandboxed executor — only your printed summary enters context. An FTS5 knowledge base indexes content for on-demand search. Blocks curl/wget/WebFetch entirely. | | **RTK proxy** | 60–90% | Rewrites dev tool output (`git status`, `npm test`, etc.) to remove noise before it enters the context window | | **Conditional rule loading** | Variable | Coding standards load only for matching file types — Python rules don't load when editing TypeScript | | **Progressive skill disclosure** | ~90% | Skill frontmatter (~100 tokens) loads always; full SKILL.md loads only on activation; linked files load on demand | | **Scoped MCP tools** | Variable | MCP tool schemas are lazy-loaded via `ToolSearch` — only fetched when needed, not preloaded | -## Context Display +### context-mode in detail + +The biggest single source of context savings. When a command would dump 50KB+ of output into your context window, context-mode intercepts it and runs it in a sandbox. The tool hierarchy: + +1. **`ctx_batch_execute`** — Run multiple commands + search in one call. Replaces 30+ individual tool calls. +2. **`ctx_search`** — Query indexed content. Pass all questions as an array in one call. +3. **`ctx_execute` / `ctx_execute_file`** — Run code in sandbox (JS, Python, shell). Only stdout enters context. + +PreToolUse hooks automatically guide Claude toward these tools when it attempts commands that would produce large output. Session continuity hooks (PostToolUse, PreCompact, SessionStart) track 13 event categories across compactions. + +## Status line display The status line shows context usage as a visual progress bar: ``` -Opus 4.6 [1M] | █████░▓ 60% | ... +Opus 4.7 [1M] | █████░▓ 60% | ... ``` -Claude Code reserves ~16.5% of the context window as a compaction buffer, triggering auto-compaction at ~83.5% raw usage. Pilot Shell rescales this to an **effective 0–100% range** so the bar fills naturally to 100% right before compaction fires. A `▓` buffer indicator shows the reserved zone. The context monitor warns at ~80% effective (informational) and ~90%+ effective (caution). +Claude Code reserves ~16.5% of the context window as a compaction buffer, triggering auto-compaction at ~83.5% raw usage. Pilot Shell rescales this to an **effective 0–100% range** so the bar fills naturally to 100% right before compaction fires. A `▓` indicator shows the reserved zone. The monitor warns at ~80% effective (informational) and ~90%+ effective (caution). -## Compaction Resilience +## When compaction fires -When compaction does fire (more common on 200K windows), Pilot Shell preserves state automatically: +On 200K windows, compaction happens more often. Pilot Shell preserves state automatically across the three lifecycle events: ``` PreCompact → Compact → SessionStart(compact) ``` 1. **PreCompact** — `pre_compact.py` captures active plan, task list, recent decisions, and key context to Pilot Shell Console memory. -2. **Compact** — Claude Code summarizes conversation history. Preserves recent tool calls and conversation flow. -3. **SessionStart(compact)** — `post_compact_restore.py` re-injects Pilot context: active plan path, task state, key decisions. Work resumes seamlessly. +2. **Compact** — Claude Code summarizes conversation history while preserving recent tool calls and flow. +3. **SessionStart(compact)** — `post_compact_restore.py` re-injects the active plan path, task state, and key decisions. Work resumes seamlessly. Memory observations (decisions, discoveries, bugfixes) persist independently in SQLite — they survive compaction regardless of hooks. -## Parallel Sessions +:::tip Don't rush the current task +Context limits are not an emergency — auto-compaction preserves everything and resumes cleanly. Finish the current task with full quality. The only thing that matters is the output, not the context percentage. +::: -Multiple Pilot Shell sessions can run on the same project without interference. Each session has its own context window, task list, and plan state. The Console dashboard tracks all active sessions. +## Running parallel sessions -:::tip Never rush due to context warnings -Context limits are not an emergency — auto-compaction preserves everything and resumes seamlessly. Finish the current task with full quality. The only thing that matters is the output, not the context percentage. -::: +Multiple Pilot Shell sessions can run on the same project without interference. Each session has its own context window, task list, and plan state. The Console dashboard tracks every active session so you can jump between them. diff --git a/docs/docusaurus/docs/features/customization.md b/docs/docusaurus/docs/features/customization.md new file mode 100644 index 00000000..37db4704 --- /dev/null +++ b/docs/docusaurus/docs/features/customization.md @@ -0,0 +1,188 @@ +--- +sidebar_position: 10 +title: Customization +description: Customize what Pilot Shell auto-installs — skills, rules, hooks, agents, MCP servers, LSP servers, and Claude settings +--- + +# Customization + +Customize everything Pilot Shell auto-installs on your machine. Tweak the built-in `/spec` workflow, modify existing rules, register additional hooks on top of Pilot's defaults, change which MCP or LSP servers get configured, adjust the auto-applied `settings.json` and `claude.json` — all without forking Pilot or hand-editing `~/.claude/` after every update. Available on **Team** and **Enterprise** plans. + +- **Team-wide (git URL):** publish your customization as a git repo; every developer runs `pilot customize install <git-url>` once, and `pilot customize update` pulls your team's latest. +- **Individual (local path):** drop the same files into a folder on your machine (e.g. `~/my-pilot-patch/`) and run `pilot customize install ~/my-pilot-patch`. `pilot customize update` re-applies directly from the same folder, so your edits take effect on the next update. + +## What you can customize + +| Target | What lives in the repo | How it composes | +|--------|------------------------|-----------------| +| **Skills** | `skills/<name>/...` + optional overlay ops in `customization.json` | Overlay ops (`insert_after`, `insert_before`, `replace`, `disable`) modify Pilot's built-in workflow skills (e.g. `/spec`, `/prd`); whole skill directories add new ones | +| **Rules** | `rules/*.md` | New rules are additive; same filename as a core rule → modifies the built-in rule | +| **Hooks** | `hooks/*.sh` + `hooks/hooks.json` | Scripts copied as-is; `hooks.json` registers additional hooks alongside Pilot's core hooks | +| **Agents** | `agents/*.md` | Add new agents alongside Pilot's built-ins (e.g. plug extra reviewers into the spec workflow) | +| **Top-level config** | `settings.json`, `claude.json`, `.mcp.json`, `.lsp.json` in the repo root | Modify the auto-applied Claude settings, app config, MCP server list, and LSP server list — see [Overriding top-level config](#overriding-top-level-config) below | + +## File structure + +The structure is the same whether you publish it as a git repo or keep it as a local folder. Directory names map 1:1 to `~/.claude/`: + +``` +my-customization/ +├── customization.json # Required: metadata + optional skill overlays +├── settings.json # Optional: deep-merges into ~/.claude/settings.json +├── claude.json # Optional: deep-merges into ~/.claude.json +├── .mcp.json # Optional: replaces ~/.claude/pilot/.mcp.json +├── .lsp.json # Optional: replaces ~/.claude/pilot/.lsp.json +├── skills/ # → ~/.claude/skills/ +│ ├── spec-plan/steps/ +│ │ └── security-review.md # New step injected into spec-plan +│ └── team-deploy/ # Brand-new skill +│ ├── manifest.json +│ ├── orchestrator.md +│ └── steps/01-stage.md +├── rules/ # → ~/.claude/rules/ +│ ├── team-standards.md # Additive +│ └── testing.md # Overrides core (same filename) +├── hooks/ # → ~/.claude/pilot/hooks/ +│ ├── team-lint-check.sh +│ └── hooks.json # Registers team-lint-check.sh (see below) +└── agents/ # → ~/.claude/pilot/agents/ + └── team-reviewer.md +``` + +Only ship the files and directories you need. A repo with just `rules/` is a valid customization; so is one with just `.mcp.json`. + +## Install and manage + +```bash +pilot customize install <source> # Install and apply +pilot customize update # Re-apply (pulls git source; reads local source in place) +pilot customize status # Active source, file counts, drift +pilot customize diff <skill>/<step-id> # Unified diff vs upstream +pilot customize remove # Restore pristine state +``` + +`<source>` is either a git URL (`https://...`, `git@...`, `ssh://...`) or a local directory path (`/path/to/folder`, `~/my-patch`). Git sources are cloned into `~/.pilot/cache/customization/`; local sources are read in place on every apply — no cache, edits take effect immediately on `pilot customize update`. + +`install` accepts `--branch <name>` (git only), `--subfolder <path>`, and `--json`. Every command is transactional: invalid overlay references fail fast before any file is written. + +## Skill overlays + +Each Pilot workflow skill ships as `orchestrator.md` + `manifest.json` + step fragments. Your `customization.json` declares how your fragments compose with upstream: + +```json +{ + "name": "Acme Team", + "version": "1.0.0", + "schemaVersion": 2, + "pilotVersionMin": "2.6.0", + "overlays": { + "<skill-name>": { + "insert_after": { "<anchor-id>": [{ "id": "my-step", "file": "steps/my-step.md" }] }, + "insert_before": { "<anchor-id>": [{ "id": "my-step", "file": "steps/my-step.md" }] }, + "replace": { "<step-id>": { "file": "steps/my-replacement.md" } }, + "disable": [ "<step-id>" ] + } + } +} +``` + +Overlays run per skill in order: `disable` → `replace` → `insert_before` → `insert_after`. On install, Pilot applies them in memory and regenerates `SKILL.md` via atomic swap — upstream files are never mutated, and `pilot customize remove` restores byte-identical pristine state. + +**Omit `overlays` entirely** if you're only shipping rules, hooks, or agents. + +**Finding fragment IDs** to reference: + +```bash +cat ~/.claude/skills/<skill-name>/manifest.json | jq '.steps[].id' +``` + +IDs are stable across Pilot versions — upstream can rename files or edit prose, the IDs stay constant. + +## Registering hooks + +A hook script at `hooks/team-lint-check.sh` is copied to `~/.claude/pilot/hooks/`, but Claude Code only runs it if it's registered in `hooks.json`. Pilot ships its own `hooks.json` with the core hooks; to add yours, ship a `hooks/hooks.json` in your repo that includes both. + +1. Copy `~/.claude/pilot/hooks/hooks.json` into your repo at `hooks/hooks.json` — this is your baseline +2. Append your hook entries (under `PostToolUse`, `SessionStart`, etc.) +3. Keep the existing Pilot entries — your file replaces Pilot's, so anything you omit is lost + +`pilot customize update` re-copies your `hooks.json` whenever you pull team changes, keeping your hooks registered. + +## Drift detection (optional) + +When you `replace` an upstream step, upstream may improve that same step later. Pilot will warn you if you opt in with a `pinned_hash`: + +```json +"replace": { + "step-1.6-write-plan": { + "file": "steps/my-write-plan.md", + "pinned_hash": "<hash from hashes.json>" + } +} +``` + +Grab the hash from `~/.claude/skills/<skill>/hashes.json` — Pilot writes it on every install. + +- `pilot customize status` compares your pinned hash to current upstream. Differs → warning. +- `pilot customize diff <skill>/<step-id>` shows what changed so you can port the improvement. +- Once you've updated (or chose to ignore), re-pin the hash and commit. + +**Skip it** if you don't care about tracking upstream changes. `customize status` still flags overlay IDs that no longer exist upstream — those warnings appear regardless of `pinned_hash`. + +## End-to-end example + +```bash +mkdir -p my-customization/skills/<skill>/steps && cd my-customization +git init +``` + +Create `skills/<skill>/steps/security-review.md` with your step content, then `customization.json`: + +```json +{ + "name": "Acme Team", + "version": "0.1.0", + "schemaVersion": 2, + "overlays": { + "<skill>": { + "insert_after": { + "<anchor-id>": [{ "id": "acme-security-review", "file": "steps/security-review.md" }] + } + } + } +} +``` + +Install — either from a local folder (for yourself) or from a git repo (for your team): + +```bash +# Option A: local path (personal use) +pilot customize install ~/my-customization + +# Option B: git repo (team-wide) +git add -A && git commit -m "initial customization" && git push +pilot customize install https://github.com/your-org/my-customization.git +``` + +Your step now appears in `~/.claude/skills/<skill>/SKILL.md` right after the anchor. + +## Overriding top-level config + +Four Pilot config files can be overridden from your repo root. They use different strategies based on what each file contains: + +| File in repo | Destination | Strategy | Notes | +|--------------|-------------|----------|-------| +| `settings.json` | `~/.claude/settings.json` | Deep-merge | Pack keys win; the launcher still re-injects the `model` field and env vars on every startup. | +| `claude.json` | `~/.claude.json` | Deep-merge | Preserves oauth account, project history, and caches — pack only overrides the keys it sets. | +| `.mcp.json` | `~/.claude/pilot/.mcp.json` | Full copy | Pilot-owned; safe to replace. | +| `.lsp.json` | `~/.claude/pilot/.lsp.json` | Full copy | Pilot-owned; safe to replace. | + +**Deep-merge semantics:** nested objects merge recursively (pack values replace specific keys), arrays replace wholesale. If you want to add an entry to an array, your pack's file must include every item you want the final array to contain. + +**When Pilot updates itself** (installer re-runs on version change, or you re-run `install.sh`): Pilot's baseline is re-applied, then your pack overlay re-applies on top. Pack values survive as "user customizations" through the three-way merge. + +**On `pilot customize remove`:** `.mcp.json` and `.lsp.json` are deleted — re-run `install.sh` or the installer to restore Pilot defaults. `settings.json` and `claude.json` are left in place because they contain user state (oauth session, project history) — merged pack values stay until you edit them out manually. This is intentional safety — removing a pack should never wipe user data. + +## See Also + +- **[Extensions](/docs/features/extensions)** — per-project skills, rules, commands, and agents managed in the Console. Extensions are scoped to a single project; customization is a team-wide overlay that applies across every project via `pilot customize install`. diff --git a/docs/docusaurus/docs/features/extensions.md b/docs/docusaurus/docs/features/extensions.md index df27b725..cb2ca5cb 100644 --- a/docs/docusaurus/docs/features/extensions.md +++ b/docs/docusaurus/docs/features/extensions.md @@ -55,18 +55,12 @@ Authentication uses your existing system git credentials (SSH keys or credential After connecting, a **Remote** scope filter button appears in the filter bar. Select it to see all extensions available in the team repository. Each remote extension shows its file path in the repository. -### Push and Pull +### Push, Pull, and Diff -- **Push to Remote** — From any local extension's detail modal, click the **Push** button to upload it to the team repository +- **Push to Remote** — From any local extension's detail modal, click **Push** to upload it to the team repository - **Download from Remote** — Click any remote extension and choose **Download to Global** or **Download to Project** -- **Conflict detection** — If a local file differs from the remote version, Pilot shows a diff and lets you choose whether to overwrite - -### Comparing Versions - -When an extension exists both locally and in the remote, the detail modal shows a **Remote** compare button. This opens a side-by-side diff view (same as the project vs global diff) with options to sync in either direction: - -- **Use Remote → Local** — overwrite your local version with the remote -- **Use Local → Remote** — push your local version to the remote +- **Diff and sync** — When an extension exists both locally and remotely, click the **Remote** compare button in the detail modal. A side-by-side diff view opens with sync options: **Use Remote → Local** or **Use Local → Remote** +- **Conflict detection** — If a push would overwrite a differing remote version, Pilot shows the diff and lets you choose ### APM Format (Cross-Tool Compatibility) @@ -173,3 +167,7 @@ Create extensions manually or via Claude Code commands: ├── commands/ ← plugin commands └── agents/ ← plugin agents ``` + +## See Also + +- **[Customization](/docs/features/customization)** — for team-wide workflow modification. Extensions are project-scoped; customization is a git-hosted overlay that applies across every project via `pilot customize install`. It composes custom steps into core workflow skills and adds team rules, hooks, and agents. Available on Team and Enterprise plans. diff --git a/docs/docusaurus/docs/features/hooks.md b/docs/docusaurus/docs/features/hooks.md index b123a6a1..63ebdce8 100644 --- a/docs/docusaurus/docs/features/hooks.md +++ b/docs/docusaurus/docs/features/hooks.md @@ -16,10 +16,10 @@ Blocking hooks reject actions or force fixes. Non-blocking hooks warn without in | Hook | Type | Description | |------|------|-------------| -| Memory loader | Blocking | Loads persistent context from Console memory | -| `post_compact_restore.py` | Blocking | Re-injects active plan, task state, and context after compaction | -| `session_clear.py` | Blocking | Resets session state on /clear | -| Session tracker | Async | Initializes message tracking | +| Worker context bootstrap | Blocking | Restores session context through the worker service on startup, `/clear`, and after compaction | +| `post_compact_restore.py` | Blocking | Re-injects the active plan and task state after compaction | +| `session_clear.py` | Blocking | Resets Pilot session state on `/clear` | +| Session tracker | Async | Starts background session activity tracking | ## UserPromptSubmit @@ -32,16 +32,16 @@ Blocking hooks reject actions or force fixes. Non-blocking hooks warn without in ## PreToolUse -*Before Bash, search, web, or agent tools* +*Before Bash, search, web, or agent tools run* | Hook | Type | Description | |------|------|-------------| -| `tool_redirect.py` | Blocking | Redirects to MCP alternatives, blocks Explore agent and plan mode conflicts | +| `tool_redirect.py` | Blocking | Redirects to MCP alternatives, blocks unsupported web fetch paths, and enforces `/spec`-compatible tool usage | | `tool_token_saver.py` | Blocking | Rewrites Bash commands via RTK for token savings (60-90% reduction) | ## PostToolUse -*After file edits, searches, and other tool calls* +*After file edits, reads, searches, and task tools* | Hook | Type | Description | |------|------|-------------| @@ -77,5 +77,5 @@ Additionally, `spec_plan_validator.py` and `spec_verify_validator.py` run as com | `session_end.py` | Blocking | Stops worker daemon if no other sessions active, sends dashboard notification | :::info Closed loop -When compaction fires, **PreCompact** captures your active plan and task list to persistent memory. **SessionStart** restores everything afterward — no progress lost. +When compaction fires, **PreCompact** snapshots your active plan and task list to persistent memory. **SessionStart** restores the working state afterward through the worker service and `post_compact_restore.py` so progress survives compaction. ::: diff --git a/docs/docusaurus/docs/features/mcp-servers.md b/docs/docusaurus/docs/features/mcp-servers.md index fa3bbc52..f1f9469e 100644 --- a/docs/docusaurus/docs/features/mcp-servers.md +++ b/docs/docusaurus/docs/features/mcp-servers.md @@ -8,7 +8,62 @@ description: External context always available to every session External context always available to every session. -Six MCP servers are pre-configured and always available. They're lazy-loaded via `ToolSearch` to keep context lean — discovered and called on demand. Add your own in `.mcp.json`, then run `/setup-rules` to generate documentation. +Six MCP servers are pre-configured in `.mcp.json` and lazy-loaded via `ToolSearch` to keep context lean. Pilot also installs the `context-mode` and `chrome-devtools-mcp` Claude plugins alongside them. Add your own MCP entries in `.mcp.json`, then run `/setup-rules` to generate documentation. + +## context-mode plugin + +**Context window protection — sandbox execution and FTS5 knowledge base** + +Keeps large outputs out of your context window. Commands that produce more than ~20 lines of output are routed to a sandboxed executor — only your printed summary enters context. An FTS5 knowledge base indexes content for later search. This ships via the Claude plugin system, not as an entry inside `.mcp.json`. Integrated via [context-mode](https://github.com/mksglu/context-mode). + +``` +ctx_batch_execute(commands: [...], queries: ["find errors"]) +ctx_execute(language: "javascript", code: "const r = await fetch(...)") +ctx_execute_file(path: "data.json", language: "javascript", code: "...") +ctx_search(queries: ["auth flow", "login endpoint"]) +``` + +**Key capabilities:** + +| Tool | Use case | +|------|----------| +| `ctx_batch_execute` | Run multiple commands + search in one call — replaces 30+ individual tool calls | +| `ctx_execute` | Run code in sandbox (JS, Python, shell) — only stdout enters context | +| `ctx_execute_file` | Process a file in sandbox — file content never enters context | +| `ctx_search` | Query the FTS5 knowledge base with multiple queries in one call | +| `ctx_index` | Store content in the knowledge base for later search | + +**Routing hooks** automatically intercept tools that produce large output (Bash, Read, Grep, WebFetch) and suggest context-mode alternatives. curl/wget and WebFetch are blocked entirely — use dedicated web-fetch and web-search MCP servers instead. + +## chrome-devtools-mcp plugin + +**Browser automation via Chrome DevTools Protocol** + +Enterprise-friendly fallback when the Claude Code Chrome extension can't be installed. Connects directly to Chrome via CDP — no extension needed. Also provides Lighthouse audits, performance tracing, and device emulation that other browser tools lack. Integrated via [chrome-devtools-mcp](https://github.com/anthropics/chrome-devtools-mcp). + +``` +list_pages() +navigate_page(type="url", url="http://localhost:3000") +take_snapshot() // a11y tree with uid refs +click(uid="1_8") +lighthouse_audit(device="desktop") +performance_start_trace(autoStop=true, reload=true) +``` + +**Key capabilities:** + +| Tool | Use case | +|------|----------| +| `take_snapshot` | A11y tree with uid refs for clicking, filling, hovering | +| `take_screenshot` | Visual capture of viewport or specific element | +| `evaluate_script` | Run JavaScript in the page context | +| `lighthouse_audit` | Accessibility, SEO, and best practices scores | +| `performance_start_trace` | Core Web Vitals (LCP, CLS), performance insights | +| `emulate` | Device viewport, mobile/touch, color scheme, CPU throttling | +| `list_network_requests` | Inspect all network traffic with headers and bodies | +| `list_console_messages` | Read console output filtered by type (error, warn, log) | + +**4-tier browser priority:** Claude Code Chrome → Chrome DevTools MCP → playwright-cli → agent-browser. See the `browser-automation.md` rule for detection and fallback logic. ## context7 diff --git a/docs/docusaurus/docs/features/model-routing.md b/docs/docusaurus/docs/features/model-routing.md index f3000363..00455aff 100644 --- a/docs/docusaurus/docs/features/model-routing.md +++ b/docs/docusaurus/docs/features/model-routing.md @@ -27,5 +27,20 @@ Pilot automatically routes each phase to the right model. Rather than always usi - The result: better output at lower cost than running Opus everywhere :::tip Fully configurable -Configure via the Pilot Shell Console Settings tab (`localhost:41777/#/settings`). Choose between Sonnet 4.6 and Opus 4.6 for the main session, each command, and each sub-agent independently. Context window size (200K or 1M) is configurable via the Extended Context toggle. API subscribers (Team, Enterprise) get 1M at no additional cost with all models. Max plan users must set all models to Opus — Sonnet 1M is not included in Max. +Configure via the Pilot Shell Console Settings tab (`localhost:41777/#/settings`). Choose between Sonnet 4.6 and Opus 4.7 for the main session, each command, and each sub-agent independently. Context window size (200K or 1M) is configurable via the Extended Context toggle. API subscribers (Team, Enterprise) get 1M at no additional cost with all models. Max plan users must set all models to Opus — Sonnet 1M is not included in Max. ::: + +## Pinning a Legacy or Specific Model Version + +The model dropdown in Console Settings includes a **Custom…** option that lets you enter an explicit Anthropic model ID instead of a Claude Code alias. This is useful when: + +- You want to pin a specific historical version (e.g. `claude-opus-4-6`, `claude-opus-4-5`, `claude-sonnet-4-5-20250929`) for reproducibility. +- A newer release trips content filters on code that previous releases handled, and you need a reliable fallback while the issue is reported. +- You are standardizing across a team and want every machine on the exact same model ID. + +Accepted values: + +- Any alias supported by Claude Code — currently `sonnet` and `opus`. +- Any explicit Anthropic model ID matching `claude-<suffix>` (e.g. `claude-opus-4-6`, `claude-haiku-4-5`). + +The Extended Context (`1M`) toggle only applies to the `sonnet` and `opus` aliases — explicit model IDs are passed through to Claude Code exactly as entered, so pick the concrete ID for the context window you want. diff --git a/docs/docusaurus/docs/features/open-source-tools.md b/docs/docusaurus/docs/features/open-source-tools.md index 9dc4b00b..46d572d6 100644 --- a/docs/docusaurus/docs/features/open-source-tools.md +++ b/docs/docusaurus/docs/features/open-source-tools.md @@ -50,7 +50,6 @@ Pilot Shell installs the following open-source tools during setup. Each tool is | [Probe](https://github.com/probelabs/probe) | Semantic code search engine | ISC | | [RTK](https://github.com/rtk-ai/rtk) | Token-optimized CLI proxy (60-90% savings) | MIT | | [CodeGraph](https://github.com/colbymchenry/codegraph) | Code knowledge graph and structural analysis | MIT | -| [ccusage](https://github.com/ryoppippi/ccusage) | Claude Code usage analytics | MIT | ## Plugin Runtime Dependencies @@ -60,6 +59,8 @@ Pilot Shell installs the following open-source tools during setup. Each tool is |------|---------|---------| | [Transformers.js](https://github.com/xenova/transformers.js) | Local ML model inference for embeddings | Apache-2.0 | | [sharp](https://github.com/lovell/sharp) | High-performance image processing | Apache-2.0 | +| [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) | SQLite bindings for context-mode FTS5 and session DB | MIT | +| [Turndown](https://github.com/mixmark-io/turndown) | HTML-to-markdown conversion for context-mode web fetching | MIT | ## Testing Tools @@ -67,6 +68,7 @@ Pilot Shell installs the following open-source tools during setup. Each tool is | Tool | Purpose | License | |------|---------|---------| +| [Chrome DevTools MCP](https://github.com/anthropics/chrome-devtools-mcp) | Browser automation via Chrome DevTools Protocol — Lighthouse, performance tracing, device emulation | Apache-2.0 | | [playwright-cli](https://github.com/microsoft/playwright-cli) | Browser automation — persistent sessions, tracing, network mocking | Apache-2.0 | | [agent-browser](https://github.com/vercel-labs/agent-browser) | Browser automation — lightweight, fast startup | Apache-2.0 | | [Chrome for Testing](https://developer.chrome.com/blog/chrome-for-testing/) | Headless browser engine (via playwright-cli / agent-browser) | BSD-3-Clause | @@ -79,6 +81,7 @@ Pilot Shell installs the following open-source tools during setup. Each tool is | Tool | Purpose | License | |------|---------|---------| +| [context-mode](https://github.com/mksglu/context-mode) | Sandbox execution + FTS5 knowledge base — keeps large outputs out of context | ELv2 | | [Context7](https://github.com/upstash/context7) | Library documentation lookup | MIT | | [open-websearch](https://github.com/Aas-ee/open-webSearch) | Web search (multi-engine, no API key) | MIT | | [fetcher-mcp](https://github.com/jae-jae/fetcher-mcp) | Web page fetching via Playwright | MIT | diff --git a/docs/docusaurus/docs/features/remote-control.md b/docs/docusaurus/docs/features/remote-control.md index 96cfc189..5fdafa2b 100644 --- a/docs/docusaurus/docs/features/remote-control.md +++ b/docs/docusaurus/docs/features/remote-control.md @@ -12,7 +12,7 @@ Start a `/spec` task at your desk, then monitor and steer it from the couch. You ## Prerequisites -[Remote Control](https://youtu.be/Ko7_tC1fMMM?si=kWDzYiQvxlkZTrRK) requires the **native install** of Claude Code, not the npm version. If you have the npm version installed, uninstall it first: +Remote Control requires the **native install** of Claude Code, not the npm version. If you have the npm version installed, uninstall it first: ```bash npm uninstall -g @anthropic-ai/claude-code # Remove npm version if installed @@ -29,7 +29,7 @@ You also need a **Pro, Max, Team, or Enterprise** Claude subscription. API keys pilot ``` -Start a Pilot Shell session as usual. This loads all hooks, rules, MCP servers, and project configuration. +Loads all hooks, rules, MCP servers, and project configuration. ### 2. Activate Remote Control @@ -40,11 +40,11 @@ Start a Pilot Shell session as usual. This loads all hooks, rules, MCP servers, ### 3. Connect from your phone -Open the **Claude Mobile App** ([iOS](https://apps.apple.com/app/claude-by-anthropic/id6473753684) / [Android](https://play.google.com/store/apps/details?id=com.anthropic.claude)) and go to the **Code** tab. Your Pilot Shell session appears there with a green status dot when online. +Open the **Claude Mobile App** ([iOS](https://apps.apple.com/app/claude-by-anthropic/id6473753684) / [Android](https://play.google.com/store/apps/details?id=com.anthropic.claude)) and go to the **Code** tab. Your session appears there with a green status dot when online. You can also connect from any browser at [claude.ai/code](https://claude.ai/code). -## How It Works +## How it works Sessions started via `pilot` carry over all rules, hooks, MCP servers, and project configuration. The Claude App and web interface are just a window into your local session — your machine does all the work. @@ -53,43 +53,29 @@ Sessions started via `pilot` carry over all rules, hooks, MCP servers, and proje - **Multi-device sync** — send messages from terminal, browser, and phone interchangeably - **Auto-reconnect** — reconnects automatically when your laptop wakes from sleep -## Keeping Your Computer Awake +## Starting sessions from your phone via SSH -Your computer must stay awake for the Remote Control connection to remain active. On macOS, use [Amphetamine](https://apps.apple.com/de/app/amphetamine/id937984704) to keep your Mac awake with the display off — this way you can walk away without the session disconnecting. +The setup above assumes you start sessions via `pilot` on your computer first. To start new sessions from your phone instead: -## Start Sessions via SSH From Your Phone - -The above approach assumes you start sessions via `pilot` on your computer first. To also start new Pilot Shell sessions from your phone: - -1. Install [Termius](https://termius.com/) on your **mobile phone** (not your computer) -2. Connect via SSH to your computer +1. Install [Termius](https://termius.com/) on your phone (not your computer) +2. SSH into your computer 3. Run `pilot` in any project directory -### macOS Sleep Support - -Turn on **Remote Login** in macOS Settings → General → Sharing → Advanced → Remote Login. This lets you SSH into your Mac even while it's sleeping. - -### Outside Your Home Network - -The Claude App approach works everywhere out of the box — no extra setup needed. +## Keeping your computer reachable -For the SSH/Termius approach, you need network connectivity to your computer. Install [Tailscale](https://tailscale.com/) on both your computer and phone to create a VPN tunnel that works from anywhere. +For the Claude App or browser to stay connected, and for SSH to work when you're away from the keyboard, your computer needs to stay awake and — in the SSH case — accept connections while sleeping. -## Use Cases - -| Pattern | Description | -| ------- | ----------- | -| **Walk away** | Start a `/spec` task at your desk, monitor progress from your phone | -| **Couch review** | Queue up code reviews at your workstation, approve from the couch | -| **Quick check** | Glance at a running session from your phone without going back to your desk | -| **Multi-device** | Heavy coding from terminal, lighter interactions from browser, quick approvals from phone | +| Scenario | What you need | +|----------|---------------| +| App / browser approach | Keep the Mac awake — [Amphetamine](https://apps.apple.com/de/app/amphetamine/id937984704) keeps it awake with the display off | +| SSH approach | **System Settings → General → Sharing → Advanced → Remote Login** lets you SSH into your Mac while it's sleeping | +| Away from home network (either approach) | Install [Tailscale](https://tailscale.com/) on both devices for a VPN tunnel that works anywhere — the Claude App works everywhere out of the box, so Tailscale is only needed for SSH | ## Channels — Telegram, Discord & iMessage -[Channels](https://code.claude.com/docs/en/channels) push messages from external platforms directly into your running Pilot session. Claude reads the message, acts on it with your full local environment, and replies back through the same platform. +[Channels](https://code.claude.com/docs/en/channels) push messages from external platforms directly into your running Pilot session. Claude reads the message, acts on it with your full local environment, and replies through the same platform. ```bash -# Start Pilot with a channel enabled pilot --channels plugin:telegram@claude-plugins-official pilot --channels plugin:discord@claude-plugins-official pilot --channels plugin:imessage@claude-plugins-official @@ -103,10 +89,19 @@ Channels require [Bun](https://bun.sh/) and a one-time bot setup (Telegram/Disco | **Discord** | [Create a bot](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/discord), pass token during install | Send any message to the bot → approve pairing code in terminal | | **iMessage** | [macOS only](https://github.com/anthropics/claude-plugins-official/tree/main/external_plugins/imessage), no token needed | Texting yourself works automatically | -**Channels vs Remote Control:** Remote Control gives you a window into your session from the Claude App or browser. Channels let external platforms push events *into* your session — they're complementary. Use both together: channels for incoming messages, Remote Control for monitoring and steering. +**Channels vs Remote Control:** Remote Control gives you a window *into* your session from the Claude App or browser. Channels let external platforms push events *into* your session — they're complementary. Use both: channels for incoming messages, Remote Control for monitoring and steering. **Team/Enterprise:** Channels are off by default. Admins enable them via [claude.ai Admin settings](https://claude.ai/admin-settings/claude-code). See the full [Channels documentation](https://code.claude.com/docs/en/channels) and [Channels reference](https://code.claude.com/docs/en/channels-reference) for building custom channels. +## Use cases + +| Pattern | Description | +| ------- | ----------- | +| **Walk away** | Start a `/spec` task at your desk, monitor progress from your phone | +| **Couch review** | Queue up code reviews at your workstation, approve from the couch | +| **Quick check** | Glance at a running session from your phone without going back to your desk | +| **Multi-device** | Heavy coding from terminal, lighter interactions from browser, quick approvals from phone | + ## Troubleshooting If Remote Control doesn't connect or shows authentication errors, run `/logout` followed by `/login` inside Claude Code. This re-authenticates your session and resolves most connection issues. diff --git a/docs/docusaurus/docs/features/rules.md b/docs/docusaurus/docs/features/rules.md index 72e4b0d5..fe5b7a88 100644 --- a/docs/docusaurus/docs/features/rules.md +++ b/docs/docusaurus/docs/features/rules.md @@ -8,7 +8,7 @@ description: Production-tested best practices loaded into every session Production-tested best practices loaded into every session. -Rules load automatically at session start — they're enforced standards, not suggestions. Coding standards load conditionally by file type to keep context lean. Your project-level rules in `.claude/rules/` take precedence over Pilot's built-ins. +Rules load automatically at session start — they're enforced standards, not suggestions. Pilot ships 11 built-in rules plus 5 coding standards. Coding standards load conditionally by file type to keep context lean. Your project-level rules in `.claude/rules/` take precedence over Pilot's built-ins. ## Built-in Rule Categories @@ -18,16 +18,18 @@ Rules load automatically at session start — they're enforced standards, not su - `testing.md` — TDD workflow, test strategy, coverage requirements (≥80%) - `verification.md` — Execution verification, completion requirements -### Development Practices (3 rules) +### Development Practices (4 rules) - `development-practices.md` — Project policies, systematic debugging, git rules - `context-management.md` — Context optimization and compaction resilience - `code-review-reception.md` — How to receive and act on code review feedback +- `documentation-sync.md` — Update affected docs (README, API docs, CLAUDE.md, AGENTS.md) in the same change as the code -### Tools (3 rules) +### Tooling & Context (4 rules) - `cli-tools.md` — Pilot CLI, Probe code search, RTK token optimization -- `browser-automation.md` — Browser automation for E2E UI testing (Chrome → playwright-cli → agent-browser) +- `browser-automation.md` — Browser automation for E2E UI testing (Chrome → Chrome DevTools MCP → playwright-cli → agent-browser) +- `context-mode.md` — Mandatory routing rules for large-output analysis and sandbox execution - `mcp-servers.md` — MCP server reference and tool selection guidance ## Coding Standards — Activated by File Type diff --git a/docs/docusaurus/docs/features/statusline.md b/docs/docusaurus/docs/features/statusline.md index f115d9f5..80440881 100644 --- a/docs/docusaurus/docs/features/statusline.md +++ b/docs/docusaurus/docs/features/statusline.md @@ -14,31 +14,53 @@ Pilot Shell replaces the default Claude Code status line with a rich, three-line The status line has three lines: +**Subscription users** (Pro / Max — Claude Code emits `rate_limits` on stdin): ``` -Line 1: Opus 4.6 [1M] | █████░▓ 60% [604K] | +156 -23 | main +2 ~3 | $1.45 | Savings: 65% +Line 1: Opus 4.7 [1M] | █████░▓ 60% | 5h: 42% ⇡ 2h | 7d: 18% ⇣ 4d | Savings: 65% Line 2: Spec: my-feature feature [implement] ████░░░░ 3/6 -Line 3: Pilot 8.2.1 (Solo) · CC 2.1.79 (Max) · sessions 2 · memories 12 +Line 3: Pilot 8.4.0 (Solo) · CC 2.1.80 (Max) · sessions 2 · memories 12 ``` +**API / Enterprise users** (no `rate_limits` in stdin): +``` +Line 1: Opus 4.7 [1M] | █████░▓ 60% | +156 -23 | main +2 ~3 | $1.45 | Savings: 65% +Line 2: Spec: my-feature feature [implement] ████░░░░ 3/6 +Line 3: Pilot 8.4.0 (Solo) · CC 2.1.80 · sessions 2 · memories 12 +``` + +The layout is symmetric: slots 3 and 4 swap between `5h | 7d` and `lines | git` based on what Claude Code provides on stdin. Cost is shown only for API / Enterprise users — on Pro / Max the subscription covers usage, so a dollar figure is noise and is suppressed. Savings always anchors the right side. + ### Line 1 — Session Metrics Widgets separated by `|`, from left to right: | Widget | Description | Color coding | |--------|-------------|--------------| -| **Model** | Active model in short form (`Opus 4.6 [1M]`, `Sonnet 4.6`) | Cyan | -| **Context** | Effective context usage with progress bar, buffer indicator (`▓`), and current token count (e.g., `[604K]`) | Green < 80%, Yellow 80–95%, Red 95%+ | -| **Lines changed** | Session lines added/removed (`+156 -23`). Hidden when usage API data is available | Green for added, Red for removed | -| **Git** | Branch name with staged (`+N`) and unstaged (`~N`) counts. Shows worktree branch with `wt` suffix when in a spec worktree | Magenta branch, Green staged, Yellow unstaged | -| **Cost** | Session cost in USD | Green < $1, Yellow $1–5, Red $5+ | -| **5h usage** | 5-hour usage limit percentage with reset time (requires OAuth credentials) | Green < 70%, Yellow 70–90%, Red 90%+ | -| **7d usage** | Weekly usage limit percentage with reset time | Same as 5h | -| **RTK savings** | Token savings percentage from RTK proxy (`Savings: N%`), shown when no usage data available | Cyan | - -:::info Usage API -When OAuth credentials are present (`~/.claude/.credentials.json`), the Anthropic usage API provides 5-hour and weekly usage limits — these replace the lines-changed and RTK widgets. Without credentials, lines-changed and RTK savings are shown instead. This is credential-dependent, not platform-dependent. +| **Model** | Active model in short form (`Opus 4.7 [1M]`, `Sonnet 4.6`). Legacy / pinned IDs such as `claude-opus-4-6`, `claude-sonnet-4-5-20250929`, or retired `claude-3-*` variants resolve to friendly labels (`Opus 4.6`, `Sonnet 4.5`, `Sonnet 3.7`, …). Unknown IDs display verbatim. | Cyan | +| **Context** | Effective context usage with progress bar and buffer indicator (`▓`). The session percentage alone is sufficient — no raw token count is shown. | Green < 80%, Yellow 80–95%, Red 95%+ | +| **Lines changed** | Session lines added/removed (`+156 -23`). Hidden when `rate_limits` is present. | Green for added, Red for removed | +| **Git** | Branch name with staged (`+N`) and unstaged (`~N`) counts. Shows worktree branch with `wt` suffix when in a spec worktree. Hidden when `rate_limits` is present. | Magenta branch, Green staged, Yellow unstaged | +| **Cost** | Session cost in USD. Hidden when `rate_limits` is present — on Pro / Max the subscription covers API usage, so the dollar figure is noise. | Green < $1, Yellow $1–5, Red $5+ | +| **5h usage** | 5-hour usage percentage with pacing arrow and reset countdown (`5h: 42% ⇡ 2h`). See pacing rules below. Only shown when `rate_limits` is available. | Green < 70%, Yellow 70–90%, Red 90%+ | +| **7d usage** | 7-day usage percentage with pacing arrow and reset countdown (`7d: 18% ⇣ 4d`). Only shown when `rate_limits` is available. | Same as 5h | +| **Savings** | Token savings percentage from RTK proxy (`Savings: N%`). Always shown when RTK has data, regardless of whether usage info is present. | Cyan | + +:::info Usage Limits — Cross-Platform +Claude Code 2.1.80+ emits `rate_limits` directly on stdin for **subscription plans** (Pro / Max). Pilot reads these values with no network calls, no OAuth credentials, and no platform restrictions — it works identically on macOS, Linux, and Windows. + +**API / Enterprise users** do not receive `rate_limits`, so the status line falls back to showing lines-changed, git branch, and RTK savings instead. No configuration needed — the display adapts automatically to whatever data Claude Code provides. ::: +#### Pacing Arrow + +When a usage widget is shown, Pilot compares the used percentage to the *expected* percentage based on how much of the window has elapsed: + +- **⇡** (red) — burning quota **faster** than the clock. On track to hit the limit before reset. +- **⇣** (green) — burning quota **slower** than the clock. Surplus budget available. +- *(no arrow)* — within ±3 percentage points of linear pace. On schedule. + +Example: 150 minutes into the 5-hour window (half elapsed), used is 90% → `5h: 90% ⇡ 2h 30m` — clearly over pace and heading for the limit. Used 5% in the same situation → `5h: 5% ⇣ 2h 30m` — well under pace. + ### Line 2 — Mode **Quick Mode** (no active spec): @@ -63,7 +85,7 @@ Spec: my-feature feature [implement] ████░░░░ 3/6 iter:2 ### Line 3 — Version & Session Info ``` -Pilot 8.2.1 (Solo) · CC 2.1.79 (Max) · sessions 2 · memories 12 +Pilot 8.4.0 (Solo) · CC 2.1.79 (Max) · sessions 2 · memories 12 ``` | Field | Description | diff --git a/docs/docusaurus/docs/getting-started/installation.md b/docs/docusaurus/docs/getting-started/installation.md index 3e673376..a6a994a4 100644 --- a/docs/docusaurus/docs/getting-started/installation.md +++ b/docs/docusaurus/docs/getting-started/installation.md @@ -25,35 +25,41 @@ Run from any directory — it installs globally to `~/.pilot/` and `~/.claude/`. | 1 | Prerequisites | Checks/installs Homebrew, Node.js, Python 3.12+, uv, git, jq | | 2 | Claude files | Sets up `~/.claude/` plugin — rules, commands, hooks, MCP servers | | 3 | Config files | Creates `.nvmrc` and project config | -| 4 | Dependencies | Installs Probe, RTK, CodeGraph, playwright-cli, agent-browser, language servers | +| 4 | Dependencies | Installs Probe, RTK, CodeGraph, context-mode (better-sqlite3), Chrome DevTools MCP, playwright-cli, agent-browser, language servers | | 5 | Shell integration | Auto-configures bash, fish, and zsh with the `pilot` alias | | 6 | VS Code extensions | Installs recommended extensions for your language stack | | 7 | Finalize | Success message with next steps | -## Chrome Extension (Recommended) +## Browser Automation For the best browser automation and E2E testing experience, install the [Claude Code Chrome extension](https://code.claude.com/docs/en/chrome). It provides richer visual context and direct access to your existing browser sessions. -Pilot uses a 3-tier browser tool selection: **Chrome extension** (preferred) → **playwright-cli** (thorough E2E with persistent sessions, tracing, network mocking) → **agent-browser** (lightweight, fast startup). All three are installed automatically. In environments where Chrome can't be installed (dev containers, headless CI), Pilot falls back to the CLI tools automatically. +Pilot uses a 4-tier browser tool selection: **Chrome extension** (preferred) → **[Chrome DevTools MCP](https://github.com/anthropics/chrome-devtools-mcp)** (enterprise fallback via CDP — Lighthouse, performance tracing, device emulation) → **playwright-cli** (thorough E2E with persistent sessions, tracing, network mocking) → **agent-browser** (lightweight, fast startup). All four are installed automatically. In environments where the Chrome extension can't be installed (enterprise restrictions, dev containers), Pilot falls back to Chrome DevTools MCP first, then to CLI tools. -## Codex Plugin (Optional) +## Codex Plugin (Included) -For adversarial code review powered by OpenAI Codex, install the [Codex plugin](https://github.com/openai/codex-plugin-cc): +The [Codex plugin](https://github.com/openai/codex-plugin-cc) is installed automatically by the Pilot installer. To activate it: -```bash -claude plugin install @openai/codex -``` +1. Run `/codex:setup` in any Pilot session to authenticate with your OpenAI account +2. Enable the Codex reviewers in Console Settings → Reviewers -After installation, run `/codex:setup` and enable the Codex reviewers in Console Settings → Spec Workflow → Codex Reviewers. Pilot auto-detects the plugin — when enabled, Codex provides an independent second opinion during `/spec` planning and verification phases. A [ChatGPT Plus](https://chatgpt.com/#pricing) subscription ($20/mo) covers the Codex API usage needed for code reviews. +When enabled, Codex provides an independent adversarial review during `/spec` planning and verification phases. A [ChatGPT Plus](https://chatgpt.com/#pricing) subscription ($20/mo) covers the Codex API usage needed for code reviews. ## Dev Container Pilot Shell works inside Dev Containers. Copy the `.devcontainer` folder from the [Pilot Shell repository](https://github.com/maxritter/pilot-shell/tree/main/.devcontainer) into your project, adapt it to your needs (base image, extensions, dependencies), and run the installer inside the container. The installer auto-detects the container environment and skips system-level dependencies like Homebrew. +For tighter isolation when working with untrusted code, layer Claude Code's [`/sandbox`](https://code.claude.com/docs/en/sandboxing) on top — `bubblewrap`, `socat`, `iptables`, and `ipset` are pre-installed in the Dockerfile so it works out of the box on Linux. + +### Further reading + +- [Claude Code · Development containers](https://code.claude.com/docs/en/devcontainer) — Anthropic's reference container, persistent volumes, organization policy, network egress, the `--dangerously-skip-permissions` flag. +- [Claude Code · Sandboxing](https://code.claude.com/docs/en/sandboxing) — Seatbelt (macOS) and bubblewrap (Linux/WSL2), `/sandbox` modes, `allowedDomains`, filesystem allow/deny rules, security limitations. + ## Install Specific Version ```bash -export VERSION=7.5.7 +export VERSION=8.4.0 curl -fsSL https://raw.githubusercontent.com/maxritter/pilot-shell/main/install.sh | bash ``` diff --git a/docs/docusaurus/docs/getting-started/permission-modes.md b/docs/docusaurus/docs/getting-started/permission-modes.md index 921b7a3d..d23ae6dd 100644 --- a/docs/docusaurus/docs/getting-started/permission-modes.md +++ b/docs/docusaurus/docs/getting-started/permission-modes.md @@ -59,7 +59,7 @@ Claude Code's built-in plan mode (`Shift+Tab` → "plan") is unstructured — pl Pilot Shell passes `--enable-auto-mode` to Claude Code at launch, making Auto Mode available in the `Shift+Tab` permission cycle. Auto Mode lets Claude execute actions without showing permission prompts — a separate classifier model reviews each action before it runs and blocks anything that escalates beyond the task scope. :::warning Availability -Auto Mode requires a **Team or Enterprise plan**, or **API access**. It is **not available on Pro or Max plans**. An admin must enable it in [Claude Code admin settings](https://claude.ai/admin-settings/claude-code) before users can turn it on. It also requires **Claude Sonnet 4.6 or Opus 4.6** — older models and third-party providers (Bedrock, Vertex, Foundry) are not supported. +Auto Mode is available on **Max, Team, or Enterprise plans**, or with **API access**. It is **not available on the Pro plan**. On Team and Enterprise plans, an admin must enable it in [Claude Code admin settings](https://claude.ai/admin-settings/claude-code) before users can turn it on. It also requires **Claude Sonnet 4.6 or Opus 4.7** — older models and third-party providers (Bedrock, Vertex, Foundry) are not supported. ::: ### How the Classifier Works diff --git a/docs/docusaurus/docs/getting-started/prerequisites.md b/docs/docusaurus/docs/getting-started/prerequisites.md index 5a2a16b5..bc675a2b 100644 --- a/docs/docusaurus/docs/getting-started/prerequisites.md +++ b/docs/docusaurus/docs/getting-started/prerequisites.md @@ -23,15 +23,11 @@ Pilot enhances Claude Code — it doesn't replace it. You need an active Claude | **Team Premium** | Teams | 6.25x usage per member + SSO, admin tools, billing management | | **Enterprise** | Companies | For organizations with compliance, procurement, or security requirements | -## Optional: Codex Plugin +## Codex Plugin (Included) -Install the [Codex plugin](https://github.com/openai/codex-plugin-cc) for adversarial code review powered by OpenAI Codex. When enabled in Console Settings, Codex provides an independent second opinion during `/spec` planning and verification phases. +The [Codex plugin](https://github.com/openai/codex-plugin-cc) is installed automatically with Pilot. It provides adversarial code review powered by OpenAI Codex — an independent second opinion during `/spec` planning and verification phases. -```bash -claude plugin install @openai/codex -``` - -After installation, run `/codex:setup` and enable the Codex reviewers in Console Settings → Spec Workflow → Codex Reviewers. Pilot auto-detects the plugin — Codex reviewer cards appear grayed out until the plugin is installed, then become toggleable. +**Setup:** Run `/codex:setup` once to authenticate with your OpenAI account, then enable the reviewers in Console Settings → Reviewers. Pilot auto-detects the plugin — Codex reviewer toggles appear grayed out until setup is complete. A [ChatGPT Plus](https://chatgpt.com/#pricing) subscription ($20/mo) covers the Codex API usage needed for code reviews. diff --git a/docs/docusaurus/docs/intro.md b/docs/docusaurus/docs/intro.md index 3641eca6..2773bfb2 100644 --- a/docs/docusaurus/docs/intro.md +++ b/docs/docusaurus/docs/intro.md @@ -7,9 +7,18 @@ description: Complete technical reference for Pilot Shell # Pilot Shell Documentation -**Pilot Shell** is the professional development environment for Claude Code. It provides spec-driven development, persistent memory, quality hooks, reusable skills, and a modular rules system. +**Pilot Shell** is the Claude Code engineering platform. You get plans you can review before a single line is written, tests that are enforced — not optional, knowledge that persists across sessions, and quality gates that run automatically on every edit. -## Quick Start +No more re-explaining decisions, chasing skipped tests, or reviewing 15-file changes that were never scoped. Pilot adds the structure that turns fast AI output into reliable production code. + +## Why Pilot Shell + +- **Reliable output** — every feature goes through plan → implement → verify, with TDD at each step +- **Persistent context** — architectural decisions, patterns, and project knowledge survive across sessions +- **Automatic quality** — linting, formatting, type checking, and test enforcement happen as hooks, not suggestions +- **Full visibility** — a local dashboard shows what's running, what changed, and what it cost + +## Quick start ```bash # Install @@ -24,31 +33,13 @@ cd your-project && pilot # Create a reusable skill > /create-skill -# Generate a PRD with optional research +# Brainstorm a vague idea into a PRD (with optional research) > /prd "Add real-time notifications for team updates" # Plan and build a feature > /spec "Add user authentication with OAuth" ``` -## What's Inside - -| Category | Highlights | -|----------|-----------| -| **[Getting Started](/docs/getting-started/prerequisites)** | Prerequisites, one-command installation | -| **[Workflows](/docs/workflows/setup-rules)** | `/setup-rules`, `/create-skill`, `/prd`, `/spec`, Quick Mode | -| **[Features](/docs/features/console)** | Pilot Console, statusline, model routing, rules, context optimization, remote control, hooks, extensions, Pilot CLI, MCP servers, language servers, open-source tools | - -## Key Commands - -| Command | Purpose | -|---------|---------| -| `pilot` or `ccp` | Start Claude with Pilot enhancements | -| `/setup-rules` | Generate project rules and MCP docs | -| `/prd "idea"` | Research → PRD → hand off to `/spec` | -| `/spec "task"` | Plan → Implement → Verify with TDD | -| `/create-skill` | Build a reusable skill from any topic | - ## Architecture Pilot enhances Claude Code with: @@ -59,3 +50,5 @@ Pilot enhances Claude Code with: - **Intelligent model routing** — Opus for planning, Sonnet for implementation - **Persistent memory** via local SQLite — decisions and context survive across sessions - **Pilot Console** — local web dashboard for monitoring, configuration, and skill sharing + +Explore the sidebar for [getting started](/docs/getting-started/prerequisites), [workflows](/docs/workflows/setup-rules), and [features](/docs/features/console). diff --git a/docs/docusaurus/docs/workflows/benchmark.md b/docs/docusaurus/docs/workflows/benchmark.md new file mode 100644 index 00000000..f5c85bc5 --- /dev/null +++ b/docs/docusaurus/docs/workflows/benchmark.md @@ -0,0 +1,153 @@ +--- +sidebar_position: 6 +title: /benchmark +description: Measure the real impact of rules and skills with quantitative before/after comparisons +--- + +# /benchmark + +Measure whether your rules and skills actually improve outputs — with numbers, not vibes. + +```bash +pilot +> /benchmark pilot/skills/create-skill +> /benchmark pilot/rules/testing.md +``` + +## When to use + +- You shipped a rule or skill and want evidence it helps +- You're iterating and need before/after feedback +- You want to catch regressions quantitatively + +## How it works + +`/benchmark` runs your prompts twice — **with** and **without** the target — grades both against falsifiable assertions, and shows the results inline. + +| Type | `with` | `without` | +|------|--------|-----------| +| `skill` | Skill installed | Empty `.claude/` | +| `rule` | Rule loaded | Empty `.claude/` | + +## Writing good assertions + +The only thing that matters: **baseline must plausibly fail**. + +Modern models already write tests and mock things — assertions have to reach past that. Target project-specific patterns, strict markers, exact naming regexes, coverage gates. The benchmark enforces a falsifiability gate before burning compute. + +**Good:** `Every test function is decorated with @pytest.mark.unit` — grep-verifiable, baseline often skips. +**Bad:** `The code is well-designed` — unverifiable. + +## Reading the report + +`/benchmark` shows results in **three layers**, designed to be absorbed in 30 seconds. Read top-to-bottom and stop when you have your answer. + +### Layer 1 — Verdict + headline + +The first three lines tell you whether to ship, iterate, or worry. The verdict sentence has a fixed shape: emoji label + quantified claim + forward-pointing action hint. + +``` +VERDICT 🟢 Moderate signal — rule lifts pass rate from 0.33 to 0.78 (+0.44). + Ship after addressing 1 Unreachable assertion (eval-1, #3). + + Pass rate with 7/9 (0.78) without 3/9 (0.33) Δ +0.44 🟢 + Avg time with 27.5s without 25.9s Δ +1.6s + Avg tokens with 200k without 206k Δ −6k + Runs 3/3 ok 3/3 ok 0 failed +``` + +The label maps directly to a recommended next step: + +| Delta range | Label | What to do | +|---|---|---| +| `≥ +0.50` | 🟢 **Strong signal** | Rule clearly works. Ship; consider expanding coverage. | +| `+0.20 to +0.49` | 🟢 **Moderate signal** | Rule helps. Ship if the lift justifies the rule's maintenance cost. | +| `+0.05 to +0.19` | 🟡 **Weak signal** | Real but small. Tighten assertions or strengthen the rule's language/examples. | +| `−0.05 to +0.04` | ⚪ **Indistinguishable** | Either assertions don't discriminate, or the rule isn't landing — the quadrant breakdown below tells you which. | +| `< −0.05` | 🔴 **Regression** | Investigate before shipping. The rule actively misled the model. | + +Stddev > 0.30 in pass rate on any single eval = flaky/model-dependent — re-run with `--runs 3` before drawing conclusions. + +### Layer 2 — Quadrant breakdown + +Each assertion is classified by its `(with, without)` verdict pair. The counts tell you whether to fix the rule or the evals: + +``` +Quadrant breakdown (out of 9 assertions) + 🟢 Signal 5 rule works here + ⚪ Baseline 3 eval doesn't discriminate — tighten assertions + 🟡 Unreachable 1 rule isn't cutting through — sharpen rule + 🔴 Regression 0 +``` + +| Quadrant | `with` / `without` | Means | Fix | +|---|---|---|---| +| **Signal** | ✓ / ✗ | Rule works here | Keep — consider amplifying | +| **Baseline** | ✓ / ✓ | Eval doesn't discriminate | Tighten the assertion | +| **Unreachable** | ✗ / ✗ | Rule isn't cutting through | Sharpen the target | +| **Regression** | ✗ / ✓ | Rule misled the model | Fix the target — blocks shipping | + +The dominant quadrant drives the improvement plan: Signal-heavy → ship, Baseline-heavy → fix evals, Unreachable-heavy → fix rule, any Regression → investigate first. + +### Layer 3 — Per-eval drill-down (divergent only) + +Only assertions where `with ≠ without` get a row. Matching ones are folded into the per-eval header counts so the report stays scannable. Evidence is capped at 200 chars. + +``` +Eval 1 — strict-tdd-naming-pbt 🟢×2 ⚪×0 🟡×1 🔴×0 + 🟢 #1 Every test decorated with @pytest.mark.unit + evidence (with): "All 5 tests use @pytest.mark.unit decorator" + 🟢 #2 Test names follow strict 4-segment regex + evidence (with): "9 test names match test_slugify_<scenario>_<expected>" + 🟡 #3 ≥1 property-based test using hypothesis + (both fail — rule doesn't teach hypothesis PBT) + +Eval 2 — mock-audit-new-dependency 🟢×3 ⚪×0 🟡×0 🔴×0 + 🟢 #1 BOTH pre-existing tests updated with subprocess mocks + evidence (with): "Both test_build_order_id_* updated with @patch" + 🟢 #2 Mock applied at module-of-consumption + evidence (with): "@patch('orders.subprocess.run') in all tests" + 🟢 #3 pytest run completes <1s wall time + evidence (with): "pytest finished in 0.04s, no real git invocation" +``` + +If the grader flagged any assertions as trivially-satisfied or noticed an uncovered behavior, those critiques appear in their own short section. + +## The improvement plan + +After the report, `/benchmark` proposes **specific edits** (≤ 5, ranked by expected delta) using a uniform format. Every proposal names a location, quotes the current text, shows the replacement, and labels the lever it pulls. + +``` +1. [TARGET] pilot/rules/testing.md L42–L44 + Quadrant: Unreachable (eval-1 #3 — hypothesis PBT) + Current: "Property-based testing is encouraged for complex inputs." + Propose: "⛔ Property-based test required for parsers and serializers — use `hypothesis.@given`." + Lever: Soft language → mandatory; adds the exact tool name. + +2. [EVALS] eval-2 assertion #3 + Quadrant: Baseline (grader: "would pass for partially-mocked test") + Current: "pytest run completes in <1s wall time" + Propose: "subprocess.run mock asserts called_once_with(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])" + Lever: Loose timing check → exact call signature. +``` + +Then `/benchmark` asks which path to take: + +1. Apply target edits and re-run +2. Apply eval edits and re-run (with the falsifiability gate first) +3. Both (the lift becomes harder to attribute) +4. Save the plan and stop + +`/benchmark` never applies edits silently. Re-runs land in a fresh `runs/<ts>/` so iteration-over-iteration deltas stay legible. + +## Gotcha: global contamination + +A globally-installed copy of your target in `~/.claude/` would leak into the `without` config. The runner moves it aside for the run and restores it automatically. + +Pass `--no-isolate-global` to measure "target + your day-to-day setup" instead. + +## Gotcha: conditional-loading frontmatter + +Pilot rules (and skills) can scope themselves to specific files via YAML frontmatter — e.g. `paths: ["**/*.py"]` for Python-only rules. If the benchmark prompts don't fall inside that glob, the rule stays dormant in the `with` config too and the delta collapses to 0.00 — you'd be measuring activation, not the rule's content. + +The runner strips `path:` and `paths:` from the **copy** installed into the `with` sandbox so the target loads unconditionally for every prompt during the run. The original source file is never touched. A one-line announcement at startup names which files and which fields were stripped, so it never happens silently. diff --git a/docs/docusaurus/docs/workflows/create-skill.md b/docs/docusaurus/docs/workflows/create-skill.md index b61152e9..d9e07b6c 100644 --- a/docs/docusaurus/docs/workflows/create-skill.md +++ b/docs/docusaurus/docs/workflows/create-skill.md @@ -38,9 +38,9 @@ $ pilot | **Workflow Automation** | Multi-step processes with consistent methodology | Step-by-step gates, validation, iterative refinement | | **MCP Enhancement** | Workflow guidance on top of MCP tool access | Multi-MCP coordination, domain expertise, error handling | -## Skill Complexity Spectrum +## How big should a skill be -Skills are designed with the simplest possible structure. Left = more reliable and cheaper to execute. +Skills are designed with the simplest possible structure that does the job. Simpler = more reliable and cheaper to execute. | Level | Style | Best For | |-------|-------|----------| diff --git a/docs/docusaurus/docs/workflows/fix.md b/docs/docusaurus/docs/workflows/fix.md new file mode 100644 index 00000000..43ff813e --- /dev/null +++ b/docs/docusaurus/docs/workflows/fix.md @@ -0,0 +1,119 @@ +--- +sidebar_position: 5 +title: /fix +description: Bugfix workflow — investigate, RED test, fix, audit, done. +--- + +# /fix + +Bugfix workflow with RED-before-GREEN discipline. Investigates the bug, writes a failing test, fixes at the root cause, audits, finishes. No plan file, no approval mid-flow, no separate verify phase. + +Use `/fix` for bugs. Use [`/spec`](/docs/workflows/spec) for features and architectural changes — including bugfixes that warrant a full plan with approval and code review. + +```bash +$ pilot +> /fix "annotation persistence drops fields between save and reload" +> /fix "off-by-one in pagination at boundary" +> /fix "wrong default for max_retries" +``` + +`/fix` is **always quick**. If investigation reveals the bug is multi-component, architectural, or otherwise larger than a quick fix, `/fix` stops cleanly and tells you to re-invoke with `/spec`. It does not silently switch lanes — `/fix` means quick, `/spec` means the full workflow. + +## Workflow + +``` +Investigate → RED → Fix → Audit → Quality Gate → Done +``` + +### Investigate + +Trace the bug to `file:lineN — function() does X but should do Y` with **High** or **Medium** confidence. + +- Reproduce the bug. Restate symptom, trigger, expected behaviour. +- Skim recent changes (`git log --oneline -10`). +- Start with `codegraph_context(task=…)` for orientation. For local bugs, one or two targeted reads is enough — no full call-graph traversal. +- For UI / async / race / timing bugs that don't surface from a static read: add temporary `SPEC-DEBUG:`-marked logs at component boundaries, trigger the bug, read the output, then proceed. Step 4 audit greps the marker — leftover diagnostics fail the audit. +- State the root cause out loud before writing any test. If confidence stays Low: bail out. + +### RED — Write the Reproducing Test + +Encode `Currently → Expected` via an existing public entry point. Run it; it must **fail** with an error matching the symptom. + +A test that passes against buggy code doesn't encode the bug — re-investigate. A test that errors for unrelated reasons (import error, missing fixture) is not a valid signal. + +### Fix at the Root Cause + +Make the **minimal** change at the root cause. One change, one variable, one logical fix. No "while I'm here" cleanups. No bundled refactoring. + +Forbidden: broad new `try/except`, `if value is None: return default` at the caller when the bug is upstream, swallowed exceptions, silently normalised bad inputs. + +Re-run the reproducing test → must **pass**. Then run the test module(s) covering the fix file (fast, scoped). The full anti-regression suite runs once at the Quality Gate, not after every fix iteration. + +### Audit + +Single pass — replaces the eight-substep audit of the full lane: + +- **Scope sanity** — root-cause file IS in the diff, no unplanned files appear, diff is small. +- **Symptom-patching grep** — `git diff | grep` for new `try/except`, swallowed returns, leftover `print`/`console.log` and `SPEC-DEBUG:` markers. Justify each match or revert. +- **End-to-end verification — MANDATORY** — re-run the user's actual repro and capture concrete evidence. **A passing unit test does NOT prove the bug is fixed.** Skip is not an option, no exceptions. + - **UI bugs:** browser automation against the running app. 4-tier resolution: **Claude Code Chrome** → **Chrome DevTools MCP** → **playwright-cli** → **agent-browser**. Walk the user's repro steps, read the page, confirm correct behaviour. + - **CLI:** run the exact command the user ran, capture output + exit code. + - **API:** `curl` / HTTP client, capture status + the field that proves the fix. + - **Library / SDK:** `python -c '…'`, `node -e '…'`, or scratch script with the user's args, capture the returned value. + - **Background job:** trigger manually, read logs. + +Bare assertions ("looks fixed", "behaves correctly") are insufficient — the finalise step requires evidence in the report. If the symptom persists, the unit test is at the wrong layer: move the assertion up to the user's actual entry point and re-run RED → fix → audit. + +### Quality Gate + +Lint + types + build (when applicable), then the full test suite. If a far-from-the-fix test breaks, the bug has unintended cross-coupling — bail out. + +### Finalise + +- Worktree mode: bundle test + fix into one commit (`fix: <one-line>`). +- Approval gate fires only if **Plan Approval** is enabled in Console Settings. +- The completion report includes a mandatory **E2E** line documenting what was actually run and observed — not just "tests pass". Without it, the workflow is incomplete. +- Console notification + report. + +## When to bail out — use `/spec` instead + +`/fix` stops and tells you to re-invoke with `/spec` when: + +- Bug spans 3+ files or 2+ components. +- Root cause is architectural, not a single line. +- Fix needs defense-in-depth at multiple layers. +- Confidence stays Low — root cause can't be pinned to file:line. +- Two quick-lane fix attempts have already failed. +- Fix has non-trivial UI implications that warrant a recorded Verification Scenario. + +The full lane (`/spec`) adds: Behavior Contract, three-task structure, plan file with approval gate, Console annotation cycle, `cp`+`trap` revert-test proof in verify, iteration cap at 3. + +## Common issues + +| Symptom | What it means | What to do | +| ------- | ------------- | ---------- | +| Can't reproduce | Description is too vague or environment-dependent | Ask for exact steps, env, stack trace. Do not write a speculative fix. | +| Test passes without the fix | Test doesn't encode the bug | Tighten the assertion or pick a more specific input. | +| Fix breaks far-away tests | Cross-coupling beyond the quick lane | Bail out. Re-invoke with `/spec`. | +| Reproducing test green but user still hits the bug | Test sits below the user's layer | Move the assertion to the user's actual entry point (API, browser, CLI). | +| Three failed fix attempts | Architectural problem, not a fix problem | Bail out. The pattern needs reconsidering, not another patch. | + +## Configurable Toggles + +`/fix` honours the same Console Settings as `/spec`: + +| Toggle | Default | Effect when disabled | +| ------ | ------- | -------------------- | +| **Ask Questions** | On | Investigation skips clarifying questions and uses defaults. | +| **Plan Approval** | On | The end-of-flow approval gate is skipped — fix is finalised immediately. | + +When both are off, `/fix` runs end-to-end with no user interaction. Worktree isolation is not honoured — use `/spec` if you want a worktree. + +## When to use `/spec` vs `/fix` + +| Use `/fix` | Use `/spec` | +| ---------- | ----------- | +| Something is broken | Building new functionality | +| Bug fits in 1–2 files | Architecture decisions matter | +| Root cause is locatable to a line/function | Multiple sub-systems involved | +| Fix is small and contained | Work warrants a written plan + approval | diff --git a/docs/docusaurus/docs/workflows/prd.md b/docs/docusaurus/docs/workflows/prd.md index 05848428..81090093 100644 --- a/docs/docusaurus/docs/workflows/prd.md +++ b/docs/docusaurus/docs/workflows/prd.md @@ -1,12 +1,12 @@ --- sidebar_position: 3 title: /prd -description: Generate Product Requirements Documents (PRDs) with optional research through strategic conversation before /spec +description: Brainstorm vague ideas into Product Requirements Documents through back-and-forth conversation, then hand off to /spec --- # /prd -Generate Product Requirements Documents (PRDs) through strategic conversation with optional research. Use `/prd` before `/spec` when requirements are unclear or you need to explore trade-offs before committing to a technical plan. +`/prd` is the **brainstorming surface** for ideas that aren't yet specs. Use it when you have a vague idea, a problem statement without a solution, or just want to think out loud and have Claude pressure-test directions before committing to a plan. The conversation produces a Product Requirements Document (PRD) you can hand directly to `/spec`. ```bash $ pilot @@ -17,25 +17,39 @@ $ pilot ## When to Use +`/prd` and `/spec` chain together: `/prd` defines **what** and **why** when requirements are unclear, `/spec` plans and implements **how** once you know what you're building. + | Situation | Command | |-----------|---------| | Idea is vague, requirements unclear | `/prd` first, then `/spec` | +| Only have a problem statement, not a solution | `/prd` | +| Want to brainstorm back-and-forth before deciding | `/prd` | +| Multiple obviously-different shapes could satisfy the request | `/prd` | | Need to explore trade-offs and alternatives | `/prd` | | Want research on competitors or prior art | `/prd` with Standard or Deep research | | Requirements are well-defined | `/spec` directly | | Small task, no planning needed | Quick mode (just chat) | +## Two Modes Inside One Flow + +`/prd` has two distinct conversational modes — divergent for generating ideas, convergent for locking them down: + +- **Divergent (Ideate):** Free-form prose. Claude pitches 3-5 distinct directions, you react ("yes that one, but…"), Claude pressure-tests viability and pitches the next round. No structured forms — this is where the riffing happens. +- **Convergent (Clarify → Converge → Write):** Structured `AskUserQuestion` forms with predefined options. Used once the shape is known and you're nailing down details. + +The skill picks the mode automatically based on how concrete your input is. A vague problem statement triggers Ideate; a concrete request like "Add Google OAuth" skips it. + ## Workflow ``` -Understand → Research (optional) → Clarify → Propose → Converge → Write PRD → Hand off to /spec +Understand → Research (optional) → Ideate (if vague) → Clarify → Propose → Converge → Write PRD → Hand off to /spec ``` The entire flow is conversational — one question at a time, no rushing to solutions. ### 1. Understand the Idea -Restates your idea, explores project context, and identifies the core problem. Doesn't jump to solutions. +Restates your idea, explores project context with CodeGraph, identifies the core problem, and **scope-checks** — if the request describes multiple independent subsystems (e.g., "build a platform with chat, billing, and analytics"), helps you decompose into multiple PRDs before continuing. Doesn't jump to solutions. ### 2. Research (Optional) @@ -49,9 +63,25 @@ Choose a research tier at the start: Research findings are embedded in the PRD under a dedicated section. +### 2b. Ideate (Optional — Divergent Brainstorming) + +When the idea is vague, this step kicks in **before** structured questions. Claude pitches 3-5 distinct directions in plain prose: + +> A few directions for "better onboarding": +> - **Reduce surface area** — cut the signup form to email-only, defer the rest +> - **Guided first-run** — keep signup, add a 3-step tour after first login +> - **Pre-fill from context** — infer company/role from email domain +> - **Async setup** — let users start using the product, complete profile later +> +> Which resonate, or where am I off? + +You react in your own words. Claude pressure-tests your reaction (where does it break? what does it cost?), then pitches the next round shaped by your answer. Usually 1-3 rounds — the signal to converge is when you start saying "yes, and…" instead of "no, but…". + +This step is **skipped automatically** when your input is concrete (e.g., "Add Google OAuth" — Claude won't pitch alternatives you didn't ask for). + ### 3. Ask Clarifying Questions -One question at a time. Focuses on purpose, users, constraints, success criteria, and scope boundaries. Challenges assumptions and surfaces trade-offs. Typically 3-6 questions. +Switches to structured `AskUserQuestion` forms. One question at a time, with 2-4 predefined options each. Focuses on purpose, users, constraints, success criteria, and scope boundaries. Challenges assumptions and surfaces trade-offs. Typically 3-6 questions. ### 4. Propose Approaches @@ -74,6 +104,8 @@ Saves a PRD to `docs/prd/YYYY-MM-DD-<slug>.md` with structured metadata and thes | **Key Decisions** | Trade-offs made during the conversation with reasoning | | **Research Findings** | Embedded research results (when research tier was Standard or Deep) | +After writing, Claude runs a 4-point self-review (placeholders, consistency, scope, ambiguity), then **asks you to open the file in your editor and read it through** before you confirm. If you request changes, Claude edits the specific sections in place — it doesn't rewrite the whole document, so you don't lose your editor scroll position or have to re-read everything. + ### 7. Hand Off to /spec After you confirm the PRD, asks whether to hand off to `/spec` immediately or save for later. If yes, `/spec` is invoked automatically with a reference to the PRD. @@ -90,9 +122,10 @@ PRDs are visible in the **Pilot Console** under the **Requirements** tab, where | Aspect | /prd | /spec | |--------|------|-------| -| **Purpose** | Explore and define requirements | Plan and implement | +| **Purpose** | Brainstorm and define requirements | Plan and implement | | **Output** | PRD (what and why) | Implementation plan + code (how) | -| **Style** | Conversational, strategic | Structured, technical | +| **Style** | Conversational, divergent then convergent | Structured, technical | +| **Best fit** | Vague ideas, problem statements, "I'm thinking…" | Concrete requirements, "I need to build X" | | **Research** | Optional (Quick/Standard/Deep) | No research phase | | **Questions** | One at a time, exploratory | Batched, focused on design | | **When** | Idea stage, unclear requirements | Ready to build | diff --git a/docs/docusaurus/docs/workflows/setup-rules.md b/docs/docusaurus/docs/workflows/setup-rules.md index 9477a77b..64369a94 100644 --- a/docs/docusaurus/docs/workflows/setup-rules.md +++ b/docs/docusaurus/docs/workflows/setup-rules.md @@ -17,21 +17,22 @@ $ pilot ## What /setup-rules Does -11 phases: +12 phases: | Phase | Action | |-------|--------| | 0 | Load reference guidelines, recommended directory structure, error handling | -| 1 | Read existing rules (including nested subdirectories), detect structure and path-scoping | +| 1 | Read existing rules (including nested subdirectories), detect structure and path-scoping. Also detects `CLAUDE.md` and `AGENTS.md` | | 2 | Migrate unscoped assets to `{slug}`-prefixed names | | 3 | Quality audit against best practices (size, specificity, path-scoping enforcement) | | 4 | Explore codebase with Probe CLI, CodeGraph, and Grep to find patterns | | 5 | Compare discovered vs documented patterns | -| 6 | Sync project rule, nested directories, and generate rules README | +| 6 | Sync project rule, nested directories, and generate rules README. Migrates `CLAUDE.md` / `AGENTS.md` content into modular rules | | 7 | Sync MCP server documentation | | 8 | Discover new rules, place in correct directory by scope | | 9 | Cross-check: validate references, README, path-scoping | -| 10 | Report summary of all changes made | +| 10 | Sync rules back to `AGENTS.md` if it exists (always asks first; never created if absent). User-authored sections preserved | +| 11 | Report summary of all changes made | ## When to Run /setup-rules diff --git a/docs/docusaurus/docs/workflows/spec.md b/docs/docusaurus/docs/workflows/spec.md index 377dd206..3bf9b7db 100644 --- a/docs/docusaurus/docs/workflows/spec.md +++ b/docs/docusaurus/docs/workflows/spec.md @@ -8,15 +8,14 @@ description: Plan, implement, and verify complex features with full automation u Plan, implement, and verify complex features with full automation using Spec-Driven Development. -**Replaces Claude Code's built-in plan mode (Shift+Tab).** Best for complex features, refactoring tasks, or any work where you want to review a plan before implementation begins. The structured workflow prevents scope creep and ensures every task is tested and verified before being marked complete. +**Replaces Claude Code's built-in plan mode (Shift+Tab).** Best for new features, refactoring, architectural changes — work where a plan and a design discussion add value before code. The structured workflow prevents scope creep and ensures every task is tested and verified before being marked complete. -> **Tip:** For unclear requirements, use [`/prd`](/docs/workflows/prd) first to research and produce a PRD, then hand off to `/spec`. +For bugfixes, use [`/fix`](/docs/workflows/fix). For vague ideas, use [`/prd`](/docs/workflows/prd) first to produce a PRD, then hand off to `/spec`. ```bash $ pilot > /spec "Add user authentication with OAuth and JWT tokens" > /spec "Migrate the REST API to GraphQL" -> /spec "Fix the crash when deleting nodes with two children" # bugfix auto-detected ``` ## Workflow @@ -38,14 +37,9 @@ Full exploration workflow for new functionality, refactoring, or any work where - Full plan with scope, risks, and Definition of Done - Unified verification agent (optional, configurable in Console Settings) -### Bugfix Spec (auto-detected) +### Bugfixes -Investigation-first flow for targeted fixes. Finds the root cause before touching any code. - -- Root cause tracing: backward through call chain to `file:line` -- Pattern analysis: compare broken vs working code paths -- Test-before-fix: regression test FAILS → fix → all tests PASS -- Lightweight verify: regression test + full suite, no sub-agents +For a bugfix workflow without a plan file, use [`/fix`](/docs/workflows/fix). When the user types `/spec` with a bug description, the full bugfix workflow runs — root-cause investigation, three-task structure (RED test → fix → quality gate), Behavior Contract audit, revert-test proof in verify, iteration cap at 3. ## Three Phases @@ -59,19 +53,19 @@ Investigation-first flow for targeted fixes. Finds the root cause before touchin ### Implement Phase -- Isolated git worktree on a dedicated branch (optional) +- Isolated git worktree, new branch from default, or current branch (your choice) - Strict TDD for each task: RED → GREEN → REFACTOR - Quality hooks auto-lint, format, and type-check every edit -- Full test suite after each task to catch regressions early +- Full test suite runs at the **Quality Gate** task (end), not after every task — running it per-fix-task is the single biggest token sink in bundled bugfix plans, so the targeted test module is used between fixes and the authoritative full-suite run happens once ### Verify Phase - Full test suite + type checking + lint + build verification - Features: unified review sub-agent (optional, enabled by default) - Bugfixes: regression test + full suite — no sub-agents needed -- For UI features: executes the plan's **E2E test scenarios** step-by-step via browser automation (Claude Code Chrome → playwright-cli → agent-browser) — tracks pass/fail per scenario, auto-fixes failures (up to 2 attempts), escalates persistent failures to known issues; results written back to the plan file +- For UI features: executes the plan's **E2E test scenarios** step-by-step via browser automation (Claude Code Chrome → Chrome DevTools MCP → playwright-cli → agent-browser) — tracks pass/fail per scenario, auto-fixes failures (up to 2 attempts), escalates persistent failures to known issues; results written back to the plan file - Auto-fixes findings, loops back until all checks pass -- After automated checks pass, prompts you to **review code changes** in the Console's Changes tab — enable Review mode, add inline annotations on any diff line (they save automatically), and the agent addresses them before marking the spec as verified +- After automated checks pass, prompts you to **review code changes** in the Console's Changes tab — each file shows a **T{N}** badge linking it to the spec task that changed it, and you can click **Spec** to group files by task for focused review. Enable Review mode to add inline annotations on any diff line (they save automatically), and the agent addresses them before marking the spec as verified ## Configurable Toggles @@ -81,7 +75,7 @@ All interaction points in `/spec` are configurable via **Console Settings → Sp | Toggle | Default | Effect when disabled | | -------------------- | ------- | ------------------------------------------------------------------------- | -| **Worktree Support** | On | Worktree is never used — implementation always runs on the current branch | +| **Worktree Support** | On | Worktree and new-branch options are hidden — implementation always runs on the current branch | | **Ask Questions** | On | Planning runs fully autonomous — no clarifying questions | | **Plan Approval** | On | Implementation starts immediately after planning — no approval gate | @@ -96,6 +90,18 @@ When all three are disabled, `/spec` runs end-to-end without any user interactio Both reviewers run in a separate context window and don't consume the main session's context budget. Optional **Codex adversarial reviewers** (off by default) provide an independent second opinion using OpenAI Codex. -## Worktree Isolation (Optional) +**Codex runs at most once per `/spec` invocation.** Plan iterations (annotation feedback, verify re-runs, fixing prior findings) reuse the result of the first Codex review instead of re-launching — a sentinel file in the session directory enforces this. The bugfix planning phase no longer runs Codex at all; adversarial review is most valuable on real code, not on a plan. + +## Branch Strategy (Optional) + +When starting a `/spec` task, you're asked how you want to work: + +| Option | What happens | +| ------ | ------------ | +| **Use worktree** | Creates an isolated git worktree on a dedicated branch. `main` stays clean. Pilot auto-stashes uncommitted changes, restores them after. Squash-merged after verification — or discard with no cleanup. | +| **Current branch** | Works directly on whatever branch you're on. Simplest option when you're already on a clean feature branch. | +| **New branch from default** | Fetches origin, creates `feat/<slug>` (or `fix/<slug>` for bugfixes) from `origin/main`, and checks it out. Best when your current branch isn't clean but you don't want full worktree isolation. | + +Disable the **Worktree Support** toggle in Console Settings to skip this question entirely — `/spec` will always use the current branch. -When starting a `/spec` task, you can choose to work in an isolated git worktree. All implementation happens on a dedicated branch — `main` stays clean throughout. Pilot auto-stashes any uncommitted changes before creating the worktree and restores them after. After verification passes, choose to squash merge back. If the experiment doesn't work out, discard the worktree with no cleanup required. +For bugfixes, use [`/fix`](/docs/workflows/fix) — the worktree question is asked here in `/spec` because that's where it applies. diff --git a/docs/docusaurus/docusaurus.config.ts b/docs/docusaurus/docusaurus.config.ts index 804be718..fa7b908b 100644 --- a/docs/docusaurus/docusaurus.config.ts +++ b/docs/docusaurus/docusaurus.config.ts @@ -4,11 +4,15 @@ import type * as Preset from "@docusaurus/preset-classic"; const config: Config = { title: "Pilot Shell", - tagline: "The professional development environment for Claude Code", + tagline: "The Claude Code engineering platform — spec-driven plans, enforced tests, persistent knowledge", favicon: "img/favicon.png", url: "https://pilot-shell.com", baseUrl: "/", + // Match Vercel's `trailingSlash: false` so canonicals point to the actually-served URL. + // Without this, Docusaurus emits canonical=/docs/X/ but Vercel 308-redirects to /docs/X. + // Google sees the conflict and declines to index the entry point. + trailingSlash: false, organizationName: "maxritter", projectName: "pilot-shell", @@ -48,6 +52,14 @@ const config: Config = { ], themeConfig: { + image: "https://pilot-shell.com/logo.png", + metadata: [ + { name: "keywords", content: "Claude Code engineering platform, Claude Code, Claude Code platform, Claude Code framework, spec-driven development, Pilot Shell, TDD enforcement, AI coding agent, MCP servers, Claude Code best practices" }, + { name: "twitter:card", content: "summary_large_image" }, + { name: "twitter:site", content: "@maxritter" }, + { property: "og:type", content: "website" }, + { property: "og:site_name", content: "Pilot Shell" }, + ], colorMode: { defaultMode: "dark", disableSwitch: false, @@ -86,8 +98,8 @@ const config: Config = { title: "Docs", items: [ { label: "Getting Started", to: "/docs/getting-started/prerequisites" }, - { label: "Workflows", to: "/docs/workflows/sync" }, - { label: "Features", to: "/docs/features/share" }, + { label: "Spec Workflow", to: "/docs/workflows/spec" }, + { label: "Hooks Pipeline", to: "/docs/features/hooks" }, ], }, { diff --git a/docs/docusaurus/sidebars.ts b/docs/docusaurus/sidebars.ts index 909e7ad2..5429aedf 100644 --- a/docs/docusaurus/sidebars.ts +++ b/docs/docusaurus/sidebars.ts @@ -22,6 +22,8 @@ const sidebars: SidebarsConfig = { "workflows/create-skill", "workflows/prd", "workflows/spec", + "workflows/fix", + "workflows/benchmark", "workflows/quick-mode", ], }, @@ -31,6 +33,7 @@ const sidebars: SidebarsConfig = { collapsed: false, items: [ "features/console", + "features/bot", "features/statusline", "features/model-routing", "features/rules", @@ -38,6 +41,7 @@ const sidebars: SidebarsConfig = { "features/remote-control", "features/hooks", "features/extensions", + "features/customization", "features/cli", "features/mcp-servers", "features/language-servers", diff --git a/docs/img/dashboard.png b/docs/img/dashboard.png index 22e7b611..d2ad9d68 100644 Binary files a/docs/img/dashboard.png and b/docs/img/dashboard.png differ diff --git a/docs/img/demo.gif b/docs/img/demo.gif index e1711dc9..339bd300 100644 Binary files a/docs/img/demo.gif and b/docs/img/demo.gif differ diff --git a/docs/img/extensions.png b/docs/img/extensions.png index 8d254204..8770c7ce 100644 Binary files a/docs/img/extensions.png and b/docs/img/extensions.png differ diff --git a/docs/img/specifications.png b/docs/img/specifications.png index 93140b79..25bcbe4e 100644 Binary files a/docs/img/specifications.png and b/docs/img/specifications.png differ diff --git a/docs/img/statusline.png b/docs/img/statusline.png new file mode 100644 index 00000000..8683cc18 Binary files /dev/null and b/docs/img/statusline.png differ diff --git a/docs/site/build-all.sh b/docs/site/build-all.sh index 12f3a291..f2d37607 100755 --- a/docs/site/build-all.sh +++ b/docs/site/build-all.sh @@ -24,19 +24,37 @@ BUILD="$DOCUSAURUS_DIR/build" # Copy Docusaurus docs into Vite dist cp -r "$BUILD/docs" "$DIST/docs" +# With Docusaurus `trailingSlash: false`, the docs entry page builds as `docs.html` +# at the build root (not `docs/index.html`). Copy it so `/docs` resolves via Vercel's +# cleanUrls — without this, /docs would fall through to the SPA rewrite and serve +# the marketing page instead of the docs intro. +[ -f "$BUILD/docs.html" ] && cp "$BUILD/docs.html" "$DIST/docs.html" + # Merge Docusaurus assets into Vite assets (no filename conflicts — Vite uses hashes, Docusaurus uses css/js subdirs) cp -r "$BUILD/assets/"* "$DIST/assets/" -# Copy search plugin files -[ -d "$BUILD/search" ] && cp -r "$BUILD/search" "$DIST/search" +# Copy search plugin files. With trailingSlash: false, search becomes search.html +# (a file at root) instead of search/index.html. +if [ -f "$BUILD/search.html" ]; then + cp "$BUILD/search.html" "$DIST/search.html" +elif [ -d "$BUILD/search" ]; then + cp -r "$BUILD/search" "$DIST/search" +fi [ -f "$BUILD/search-index.json" ] && cp "$BUILD/search-index.json" "$DIST/search-index.json" # Copy Docusaurus img (favicon etc.) — merge into existing img or create [ -d "$BUILD/img" ] && cp -r "$BUILD/img" "$DIST/img" -# Copy sitemaps +# Move Docusaurus's sitemap into /docs/ so it doesn't overwrite the Vite-generated +# sitemap index at /sitemap.xml (which references both pages and docs sitemaps). +if [ -f "$BUILD/sitemap.xml" ]; then + cp "$BUILD/sitemap.xml" "$DIST/docs/sitemap.xml" +fi +# Copy any other Docusaurus xml files (rss, atom, etc.) to /docs/ for f in "$BUILD"/*.xml; do - [ -f "$f" ] && cp "$f" "$DIST/" 2>/dev/null || true + if [ -f "$f" ] && [ "$(basename "$f")" != "sitemap.xml" ]; then + cp "$f" "$DIST/docs/" 2>/dev/null || true + fi done echo "=== Build complete ===" diff --git a/docs/site/index.html b/docs/site/index.html index 9c4ea18a..ba8209c7 100644 --- a/docs/site/index.html +++ b/docs/site/index.html @@ -5,27 +5,40 @@ <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <!-- Primary Meta Tags --> - <title>Pilot Shell - Claude Code is powerful. Pilot Shell makes it reliable. - + Pilot Shell — The Claude Code Engineering Platform + - + + + - + - + - + + + + + + + + + + + + - - + + + - - + + + - + + + + + + + + + @@ -132,6 +290,51 @@ })();
+ + + + diff --git a/docs/site/public/0bd196f90bbc9ec8113bd78de2507fb2.txt b/docs/site/public/0bd196f90bbc9ec8113bd78de2507fb2.txt new file mode 100644 index 00000000..84b4f7f5 --- /dev/null +++ b/docs/site/public/0bd196f90bbc9ec8113bd78de2507fb2.txt @@ -0,0 +1 @@ +0bd196f90bbc9ec8113bd78de2507fb2 diff --git a/docs/site/public/console/changes.png b/docs/site/public/console/changes.png index b687b832..cc3d4ee8 100644 Binary files a/docs/site/public/console/changes.png and b/docs/site/public/console/changes.png differ diff --git a/docs/site/public/console/dashboard.png b/docs/site/public/console/dashboard.png index 22e7b611..d2ad9d68 100644 Binary files a/docs/site/public/console/dashboard.png and b/docs/site/public/console/dashboard.png differ diff --git a/docs/site/public/console/extensions.png b/docs/site/public/console/extensions.png index 8d254204..8770c7ce 100644 Binary files a/docs/site/public/console/extensions.png and b/docs/site/public/console/extensions.png differ diff --git a/docs/site/public/console/help.png b/docs/site/public/console/help.png index 7bf4087c..841a143c 100644 Binary files a/docs/site/public/console/help.png and b/docs/site/public/console/help.png differ diff --git a/docs/site/public/console/memories.png b/docs/site/public/console/memories.png index 0e4a55f6..2d2528c2 100644 Binary files a/docs/site/public/console/memories.png and b/docs/site/public/console/memories.png differ diff --git a/docs/site/public/console/requirements.png b/docs/site/public/console/requirements.png index 88c34f4f..1de97d33 100644 Binary files a/docs/site/public/console/requirements.png and b/docs/site/public/console/requirements.png differ diff --git a/docs/site/public/console/sessions.png b/docs/site/public/console/sessions.png index 04857463..b5bcc2cb 100644 Binary files a/docs/site/public/console/sessions.png and b/docs/site/public/console/sessions.png differ diff --git a/docs/site/public/console/settings.png b/docs/site/public/console/settings.png index adffa6bc..142b6dd0 100644 Binary files a/docs/site/public/console/settings.png and b/docs/site/public/console/settings.png differ diff --git a/docs/site/public/console/specifications.png b/docs/site/public/console/specifications.png index 93140b79..25bcbe4e 100644 Binary files a/docs/site/public/console/specifications.png and b/docs/site/public/console/specifications.png differ diff --git a/docs/site/public/console/usage.png b/docs/site/public/console/usage.png index 3bfcaa66..51676c8d 100644 Binary files a/docs/site/public/console/usage.png and b/docs/site/public/console/usage.png differ diff --git a/docs/site/public/demo.gif b/docs/site/public/demo.gif index e1711dc9..339bd300 100644 Binary files a/docs/site/public/demo.gif and b/docs/site/public/demo.gif differ diff --git a/docs/site/public/manifest.json b/docs/site/public/manifest.json index 9da7b435..74dd5a45 100644 --- a/docs/site/public/manifest.json +++ b/docs/site/public/manifest.json @@ -1,7 +1,7 @@ { "name": "Pilot Shell", "short_name": "Pilot", - "description": "Claude Code is powerful. Pilot Shell makes it reliable.", + "description": "The Claude Code engineering platform.", "start_url": "/", "display": "standalone", "background_color": "#000000", diff --git a/docs/site/public/robots.txt b/docs/site/public/robots.txt index 40952a30..34689bcc 100644 --- a/docs/site/public/robots.txt +++ b/docs/site/public/robots.txt @@ -1,17 +1,28 @@ +User-agent: * +Allow: / +Disallow: /shared + +# Common bots get explicit allow lines for clarity User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / +User-agent: DuckDuckBot +Allow: / + +User-agent: YandexBot +Allow: / + User-agent: Twitterbot Allow: / User-agent: facebookexternalhit Allow: / -User-agent: * +User-agent: LinkedInBot Allow: / -# Sitemap location +# Sitemap index (references landing pages and docs) Sitemap: https://pilot-shell.com/sitemap.xml diff --git a/docs/site/src/components/ConsoleSection.tsx b/docs/site/src/components/ConsoleSection.tsx index 9241eaf6..50eb7c6f 100644 --- a/docs/site/src/components/ConsoleSection.tsx +++ b/docs/site/src/components/ConsoleSection.tsx @@ -6,44 +6,44 @@ const consoleSlides = [ { label: "Dashboard", src: "/console/dashboard.png", - alt: "Console Dashboard — grouped stats, workspace status, and spec progress", - desc: "Grouped stats for memory, specifications, and extensions. Workspace cards for usage, git, specs, and worker.", + alt: "Console Dashboard — stats, recent specifications, sessions, requirements, memories", + desc: "Global command center with 8 clickable stat cards and 4 recent cards (Specifications, Requirements, Sessions, Memories) with quick navigation. Active specs as pills in the top bar, notification bell in top right.", }, { - label: "Changes", - src: "/console/changes.png", - alt: "Changes view — git diff, staged files, code review annotations", - desc: "Git changes, staged files, and diff viewer with branch and worktree context. Review mode adds inline annotations on any diff line — they save automatically and the agent reads them before marking a spec verified.", - }, - { - label: "Requirements", - src: "/console/requirements.png", - alt: "Requirements view — PRD generation, research tiers, and requirement tracking", - desc: "Generate and manage Product Requirements Documents. Tiered deep research, requirement tracking, and share with your team.", + label: "Sessions", + src: "/console/sessions.png", + alt: "Sessions view — browse, search, and resume past sessions", + desc: "Browse and search past sessions. Copy the Claude Code session ID and use /resume to jump back into any session instantly.", }, { - label: "Specifications", - src: "/console/specifications.png", - alt: "Specification view — plan annotation, task progress, and phase tracking", - desc: "All spec plans with task progress, phase tracking, and iteration history. Annotate mode lets you mark up plans visually — select any text and write a note. Annotations save automatically and the agent reads them at the next review checkpoint.", + label: "Memories", + src: "/console/memories.png", + alt: "Memories view — captured decisions and patterns with semantic search", + desc: "Decisions, discoveries, and patterns captured automatically. Each memory links to its session — click to navigate. Semantic search across all memories.", }, { label: "Extensions", src: "/console/extensions.png", alt: "Extensions view — local, plugin, and remote extensions with team sharing", - desc: "Browse, edit, and share extensions. Team sharing via git with push, pull, and diff. APM-compatible format for cross-tool sharing. Plugin extensions auto-discovered.", + desc: "Browse, edit, and share extensions across global, project, plugin, and remote scopes. Team sharing via git with push, pull, and diff.", }, { - label: "Memories", - src: "/console/memories.png", - alt: "Memories view — captured decisions and patterns with semantic search", - desc: "Decisions, discoveries, and patterns captured automatically. Semantic search across all memories.", + label: "Requirements", + src: "/console/requirements.png", + alt: "Requirements view — PRD brainstorming, research tiers, and requirement tracking", + desc: "Brainstorm vague ideas into Product Requirements Documents through back-and-forth conversation. Tiered deep research, requirement tracking, and team sharing.", }, { - label: "Sessions", - src: "/console/sessions.png", - alt: "Sessions view — active sessions with observation and prompt counts", - desc: "Active and past sessions with observation counts, duration, and expandable timelines.", + label: "Specifications", + src: "/console/specifications.png", + alt: "Specification view — plan annotation, task progress, and phase tracking", + desc: "All spec plans with task progress, phase tracking, and iteration history. Annotate mode lets you mark up plans visually — select any text and write a note.", + }, + { + label: "Changes", + src: "/console/changes.png", + alt: "Changes view — git diff, staged files, code review annotations", + desc: "Git changes, staged files, and diff viewer with branch and worktree context. Review mode adds inline annotations on diff lines.", }, { label: "Usage", @@ -55,7 +55,7 @@ const consoleSlides = [ label: "Settings", src: "/console/settings.png", alt: "Settings view — model selection per command, spec workflow toggles", - desc: "Choose models per command and sub-agent. Spec workflow toggles and reviewer configuration.", + desc: "Choose models per command and sub-agent. Spec workflow toggles, reviewer configuration, and pricing info.", }, { label: "Help", diff --git a/docs/site/src/components/DeepDiveSection.tsx b/docs/site/src/components/DeepDiveSection.tsx index f00f4506..a4c6f96f 100644 --- a/docs/site/src/components/DeepDiveSection.tsx +++ b/docs/site/src/components/DeepDiveSection.tsx @@ -14,6 +14,7 @@ import { Cpu, RefreshCw, Route, + Box, } from "lucide-react"; import { useInView } from "@/hooks/use-in-view"; @@ -22,21 +23,32 @@ const hooksPipeline = [ trigger: "SessionStart", description: "On startup, clear, or after compaction", hooks: [ - "Load persistent memory from Pilot Shell Console", - "Restore plan state after compaction (post_compact_restore.py)", - "Initialize session tracking (async)", + "Worker bootstrap restores session context", + "post_compact_restore.py restores plan state after compaction", + "Background session tracking starts asynchronously", ], color: "text-sky-400", bgColor: "bg-sky-400/10", borderColor: "border-sky-400/30", }, + { + trigger: "UserPromptSubmit", + description: "When the user sends a message", + hooks: [ + "spec_mode_guard.py blocks invalid /spec usage", + "Session registration starts in the background", + ], + color: "text-emerald-400", + bgColor: "bg-emerald-400/10", + borderColor: "border-emerald-400/30", + }, { trigger: "PreToolUse", - description: "Before search, web, or task tools", + description: "Before Bash, search, web, or agent tools", hooks: [ - "Block WebSearch/WebFetch — redirect to MCP alternatives", - "Block EnterPlanMode/ExitPlanMode — project uses /spec", - "Hint Probe CLI for semantic Grep patterns", + "tool_redirect.py reroutes unsupported tools to approved alternatives", + "Plan mode conflicts are blocked before they execute", + "tool_token_saver.py rewrites Bash commands through RTK", ], color: "text-amber-400", bgColor: "bg-amber-400/10", @@ -44,12 +56,11 @@ const hooksPipeline = [ }, { trigger: "PostToolUse", - description: "After every Write / Edit / MultiEdit", + description: "After edits, reads, searches, and task tools", hooks: [ - "File checker: auto-format, lint, type-check (Python, TypeScript, Go)", - "TDD enforcer: warns if no failing test exists", - "Context monitor: tracks usage, warns before compaction", - "Memory observation: captures development context (async)", + "file_checker.py runs lint, type, and TDD checks on edits", + "context_monitor.py tracks usage before compaction", + "Memory observations are captured asynchronously", ], color: "text-primary", bgColor: "bg-primary/10", @@ -59,8 +70,8 @@ const hooksPipeline = [ trigger: "PreCompact", description: "Before auto-compaction fires", hooks: [ - "Capture active plan, task list, and key context to memory", - "Snapshot current progress so nothing is lost across cycles", + "pre_compact.py snapshots the active plan and task list", + "Current progress is persisted before context is compacted", ], color: "text-violet-400", bgColor: "bg-violet-400/10", @@ -70,8 +81,8 @@ const hooksPipeline = [ trigger: "Stop", description: "When Claude tries to finish", hooks: [ - "Spec stop guard: blocks if verification incomplete", - "Session summary: saves observations to memory (async)", + "spec_stop_guard.py blocks incomplete /spec verification", + "Session summaries save observations asynchronously", ], color: "text-rose-400", bgColor: "bg-rose-400/10", @@ -81,8 +92,8 @@ const hooksPipeline = [ trigger: "SessionEnd", description: "When the session closes", hooks: [ - "Stop worker daemon if no other sessions active", - "Send real-time dashboard notification (session ended)", + "session_end.py stops the worker if no sessions remain", + "A dashboard notification marks the session complete", ], color: "text-slate-400", bgColor: "bg-slate-400/10", @@ -104,18 +115,19 @@ const rulesCategories = [ icon: Brain, category: "Development Practices", rules: [ - "Project policies & debugging", - "Auto-compaction & context management", - "Persistent memory & reusable skills", + "Project policies, debugging, and git hygiene", + "Context management and compaction resilience", + "Code review reception and change handling", + "Documentation sync — keep README, API docs, and AGENTS.md current with code changes", ], }, { icon: Search, category: "Tools", rules: [ - "Context7 + grep-mcp + web search + GitHub CLI", - "Pilot CLI + Probe search", - "Playwright CLI (E2E browser testing)", + "Pilot CLI, Probe search, and RTK token optimization", + "Browser automation: Chrome, Chrome DevTools MCP, playwright-cli, agent-browser", + "MCP server selection plus context-mode routing", ], }, { @@ -150,8 +162,13 @@ const rulesCategories = [ const mcpServers = [ { icon: BookOpen, - name: "lib-docs", - desc: "Library documentation lookup — get API docs for any dependency", + name: "context7", + desc: "Library documentation lookup for frameworks and dependencies", + }, + { + icon: Box, + name: "codegraph", + desc: "Code knowledge graph for callers, impact analysis, and structural queries", }, { icon: Brain, @@ -216,7 +233,7 @@ const DeepDiveSection = () => { Hooks Pipeline

- 18 hooks across 7 lifecycle events — fire automatically at every + 15 hooks across 7 lifecycle events — fire automatically at every stage

@@ -270,10 +287,12 @@ const DeepDiveSection = () => {

These hooks keep your context lean.{" "} - RTK compresses tool - output by 60–90%. Rules load only for matching file types. - Skills use progressive disclosure — frontmatter always, - full content on demand. When compaction does fire,{" "} + context-mode routes + large outputs to a sandbox — up to 98% savings per command.{" "} + RTK compresses + remaining tool output by 60–90%. Rules load only for matching + file types. Skills use progressive disclosure. When compaction + fires,{" "} PreCompact captures state and SessionStart{" "} restores it — no progress lost. @@ -411,7 +430,7 @@ const DeepDiveSection = () => { MCP Servers

- External context, always available + Six preconfigured servers, lazy-loaded on demand

diff --git a/docs/site/src/components/FAQSection.tsx b/docs/site/src/components/FAQSection.tsx index 3461f2c0..e36ce83f 100644 --- a/docs/site/src/components/FAQSection.tsx +++ b/docs/site/src/components/FAQSection.tsx @@ -22,7 +22,7 @@ const faqItems = [ question: "Does Pilot Shell work with other AI coding tools?", answer: - "Pilot Shell is built for Claude Code. Every hook, rule, command, and workflow is engineered specifically for Claude\u2019s tool-use protocol, prompt format, and session lifecycle. Pilot Shell supports Claude Sonnet 4.6 and Claude Opus 4.6 \u2014 these are the models that produce the best results, and every rule and prompt is optimized for their behavior. Additionally, the optional Codex plugin adds OpenAI-powered adversarial review during /spec \u2014 an independent second opinion on your plans and code changes. Codex reviewers are disabled by default and can be enabled in Console Settings.", + "Pilot Shell is built for Claude Code. Every hook, rule, command, and workflow is engineered specifically for Claude\u2019s tool-use protocol, prompt format, and session lifecycle. Pilot Shell defaults to Claude Sonnet 4.6 and Claude Opus 4.7 (and any explicit Anthropic model ID such as claude-opus-4-6 via the Console’s Custom… option) \u2014 these are the models that produce the best results, and every rule and prompt is optimized for their behavior. Additionally, the optional Codex plugin adds OpenAI-powered adversarial review during /spec \u2014 an independent second opinion on your plans and code changes. Codex reviewers are disabled by default and can be enabled in Console Settings.", }, { question: "Does Pilot Shell work with existing projects?", @@ -49,6 +49,11 @@ const faqItems = [ answer: "Yes. Create your own in your project\u2019s .claude/ folder \u2014 rules, commands, skills, and agents are all plain markdown files. Your project-level assets load alongside Pilot Shell\u2019s built-in defaults and take precedence when they overlap. /setup-rules auto-discovers your codebase patterns and generates project-specific rules. /create-skill builds reusable skills from any topic interactively. View and manage all extensions on the Console Extensions page. Team plan users can also share extensions via a connected git repository \u2014 push, pull, and compare versions with your team.", }, + { + question: "Can I customize Pilot\u2019s built-in workflows and defaults?", + answer: + "Yes \u2014 the Customization feature on Team and Enterprise plans lets you modify what Pilot Shell auto-installs, not just add alongside it. Tweak the built-in /spec workflow (insert a security-review step, replace the planning template, disable a step you don\u2019t need), adjust existing rules, register additional hooks, add review agents, change which MCP or LSP servers get configured, and override the auto-applied settings.json and claude.json. Source is either a git repo for your team or a local directory for personal use \u2014 no git needed for a one-off tweak. On Team, every developer runs `pilot customize install ` once and stays in sync via `pilot customize update`. Skill overlays stay pinned to Pilot\u2019s upstream by hash, so when Pilot ships an improvement to a step you replaced, `pilot customize status` flags the drift and `pilot customize diff` shows what changed. Enterprise adds full source-code access to Pilot itself (launcher, console, all components) on top of everything Team gets \u2014 fork, audit, and modify the entire stack.", + }, { question: "Does Pilot Shell send my code or data to external services?", answer: diff --git a/docs/site/src/components/Footer.tsx b/docs/site/src/components/Footer.tsx index 74231a2b..31a87892 100644 --- a/docs/site/src/components/Footer.tsx +++ b/docs/site/src/components/Footer.tsx @@ -21,9 +21,7 @@ const Footer = () => {

- Claude Code is powerful. -
- Pilot Shell makes it reliable. + The Claude Code engineering platform.

@@ -56,7 +54,7 @@ const Footer = () => {
  • Docs diff --git a/docs/site/src/components/HeroSection.tsx b/docs/site/src/components/HeroSection.tsx index 3df8c550..77bd8e5e 100644 --- a/docs/site/src/components/HeroSection.tsx +++ b/docs/site/src/components/HeroSection.tsx @@ -15,15 +15,14 @@ const HeroSection = () => { {/* Subtitle */}
    -

    - The professional development environment{" "} - for Claude Code. -

    +

    + Pilot Shell — the Claude Code engineering platform. +

    - From requirement to production-grade code. Planned, tested, verified. + From requirement to production-grade code — planned, tested, verified.

    - Tests enforced. Context optimized. Quality automated. + Spec-driven plans. Enforced quality gates. Persistent knowledge.

    @@ -58,19 +57,19 @@ const HeroSection = () => {
    - Hooks + Overlays
    - Quality Gates + Modify Defaults
    - Sharing + Hooks
    - Skills & Teams + Quality Gates
    @@ -84,13 +83,13 @@ const HeroSection = () => { MCP Servers - Remote Control + LSP Servers Semantic Search - Reusable Skills + Pilot Bot
    @@ -116,7 +115,7 @@ const HeroSection = () => { asChild className="w-full sm:w-auto text-sm xs:text-base" > -
    + Read Documentation diff --git a/docs/site/src/components/InstallSection.tsx b/docs/site/src/components/InstallSection.tsx index 214eb2a1..300695b2 100644 --- a/docs/site/src/components/InstallSection.tsx +++ b/docs/site/src/components/InstallSection.tsx @@ -90,7 +90,7 @@ const InstallSection = () => { /prd {" "} - requirements{" → "} + brainstorm{" → "} /spec {" "} diff --git a/docs/site/src/components/Logo.tsx b/docs/site/src/components/Logo.tsx index 89658276..bc6c7c76 100644 --- a/docs/site/src/components/Logo.tsx +++ b/docs/site/src/components/Logo.tsx @@ -19,7 +19,7 @@ const Logo = ({ variant = "hero" }: LogoProps) => { Pilot Shell - Claude Code is powerful. Pilot Shell makes it reliable. { size="sm" className="hidden sm:inline-flex" > - Docs + Docs { ))} Docs diff --git a/docs/site/src/components/PricingSection.tsx b/docs/site/src/components/PricingSection.tsx index 474ef04e..c6371cd5 100644 --- a/docs/site/src/components/PricingSection.tsx +++ b/docs/site/src/components/PricingSection.tsx @@ -96,7 +96,7 @@ const PricingSection = () => {
  • - Continuous updates with new features and improvements + Pilot Bot — 24/7 automation agent with scheduled jobs
  • @@ -164,7 +164,13 @@ const PricingSection = () => {
  • - Extension sharing — share skills, rules, commands, and agents via git, with optional APM format + Extension sharing — share skills, rules, commands, and agents via git + +
  • +
  • + + + Customization — modify Pilot's built-in skills, rules, and auto-applied settings
  • diff --git a/docs/site/src/components/SEO.tsx b/docs/site/src/components/SEO.tsx index 6e5d18f1..0923a418 100644 --- a/docs/site/src/components/SEO.tsx +++ b/docs/site/src/components/SEO.tsx @@ -11,10 +11,10 @@ interface SEOProps { } const SEO = ({ - title = "Pilot Shell - Claude Code is powerful. Pilot Shell makes it reliable.", - description = "From requirement to production-grade code. Planned, tested, verified. Tests enforced, Context optimized, quality automated.", - keywords = "Pilot Shell, Claude Code, AI coding assistant, AI pair programming, TDD enforcement, Test-Driven Development, code quality automation, linting, formatting, type checking, spec-driven development, VS Code, Cursor, Windsurf, Claude API, Anthropic, AI development tools, automated testing, code review, persistent memory, semantic code search", - canonicalUrl = "https://pilot-shell.com", + title = "Pilot Shell — The Claude Code Engineering Platform", + description = "The Claude Code engineering platform: spec-driven planning, enforced TDD, persistent memory, and quality hooks for Python, TypeScript, and Go. Make Claude Code production-ready.", + keywords = "Claude Code engineering platform, Claude Code, Claude Code platform, Claude Code framework, spec-driven development, Pilot Shell, Anthropic Claude, AI pair programming, TDD enforcement, AI coding agent, Claude Sonnet 4.6, Claude Opus 4.7, MCP servers, Claude Code productivity, AI development environment, Claude Code best practices", + canonicalUrl = "https://pilot-shell.com/", ogImage = "https://pilot-shell.com/logo.png", type = "website", structuredData diff --git a/docs/site/src/components/TestimonialsSection.tsx b/docs/site/src/components/TestimonialsSection.tsx index 7de96ca5..2b163cf7 100644 --- a/docs/site/src/components/TestimonialsSection.tsx +++ b/docs/site/src/components/TestimonialsSection.tsx @@ -4,18 +4,18 @@ import { useInView } from "@/hooks/use-in-view"; const testimonials = [ { quote: - "Pilot turned Claude Code from a fast prototype tool into something I trust for production code. The TDD enforcement alone saved me from shipping broken features twice in the first week.", - role: "Senior Developer", + "Spec-driven development in Pilot Shell is incredible. I'm so impressed that I have to resist the urge to fix every issue all at once.", + role: "Pilot Shell User", }, { quote: - "The persistent memory is what sold me. I can pick up a project after a week and Claude already knows my architecture decisions, the bugs we fixed, and why we chose certain patterns. No more re-explaining everything.", - role: "Full-Stack Engineer", + "Instead of just letting Claude Code run on its own, you've managed to make it work in a much more organized, consistent, and reliable way within a workflow, which I think is fantastic. What you've built is truly impressive.", + role: "Pilot Shell User", }, { quote: - "The /spec workflow forces me to think before I code. The plan verification catches gaps I would have missed, and the automated code review is better than most human reviews I've gotten.", - role: "Tech Lead", + "I have fallen in love with Pilot and just can't stand the idea of having to go back to native Claude.", + role: "Pilot Shell User", }, ]; diff --git a/docs/site/src/components/WhatsInside.tsx b/docs/site/src/components/WhatsInside.tsx index d3695e1c..5bfb5919 100644 --- a/docs/site/src/components/WhatsInside.tsx +++ b/docs/site/src/components/WhatsInside.tsx @@ -5,8 +5,9 @@ import { Sparkles, Search, Terminal, - Eye, DollarSign, + SlidersHorizontal, + ArrowRight, } from "lucide-react"; import { useInView } from "@/hooks/use-in-view"; @@ -15,6 +16,7 @@ interface InsideItem { title: string; description: string; summary: string; + href: string; } const insideItems: InsideItem[] = [ @@ -23,35 +25,32 @@ const insideItems: InsideItem[] = [ title: "Spec-Driven Development", description: "Replaces plan mode", summary: - "/spec plans features and bugfixes, gets your approval, implements each task with TDD, then verifies with automated code review. Loops back if any check fails.", + "/spec plans features end-to-end with TDD. /fix is the bugfix command — investigate, RED test, fix at the root cause, audit. Both honour the same approval and review toggles.", + href: "/docs/workflows/spec", }, { icon: GitBranch, title: "Context Engineering", description: "Keep your context window lean", summary: - "Curated rules for best practices, TDD, debugging, and verification. Language-specific coding standards for TypeScript, Python, and frontend. Concise and modular \u2014 only what\u2019s relevant loads into context.", + "Curated rules for best practices, TDD, debugging, and verification. Language- and architecture-specific standards cover Python, TypeScript, Go, frontend, and backend. Concise and modular \u2014 only what\u2019s relevant loads into context.", + href: "/docs/features/context-optimization", }, { icon: Terminal, title: "Quality Hooks & Testing", description: "Deterministic checks on every edit", summary: - "18 hooks across 7 lifecycle events. Auto-lint, format, and type-check every file edit. TDD enforcer warns when implementation is written without a failing test.", + "15 hooks across 7 lifecycle events. Auto-lint, format, and type-check every file edit. TDD enforcer warns when implementation is written without a failing test.", + href: "/docs/features/hooks", }, { icon: Plug2, - title: "MCP Servers", - description: "Pre-configured, no API keys", + title: "MCP & LSP Servers", + description: "Pre-configured, zero setup", summary: - "Library docs, web search, GitHub code search, persistent memory, web page fetching, and code knowledge graphs. Six servers installed and ready to use.", - }, - { - icon: Eye, - title: "Language Servers", - description: "Real-time diagnostics on every edit", - summary: - "Python, TypeScript, and Go \u2014 auto-installed and auto-configured. Hooks catch formatting issues; language servers provide type-level intelligence and real-time diagnostics.", + "Six MCP servers (docs, search, memory, code graphs) plus Python, TypeScript, and Go language servers \u2014 all auto-installed. Real-time diagnostics and type intelligence on every edit.", + href: "/docs/features/mcp-servers", }, { icon: Search, @@ -59,6 +58,7 @@ const insideItems: InsideItem[] = [ description: "Find code by intent, not keywords", summary: "Search your codebase by intent, not just keywords. AST-aware extraction pulls exactly what\u2019s needed. Call graph tracing maps blast radius before you change anything. Sub-300ms.", + href: "/docs/features/open-source-tools", }, { icon: DollarSign, @@ -66,6 +66,7 @@ const insideItems: InsideItem[] = [ description: "Right model, right task, visible spend", summary: "Smart model routing: Opus for planning, Sonnet for implementation. CLI proxy saves 60\u201390% on tool output tokens. Usage tracking in Console shows daily cost and trends.", + href: "/docs/features/model-routing", }, { icon: Sparkles, @@ -73,6 +74,15 @@ const insideItems: InsideItem[] = [ description: "Skills, rules, commands, agents", summary: "Create custom skills and rules with built-in generators. Share across machines via git, across teams via project repos. Seven extension types at four scopes \u2014 managed in Console.", + href: "/docs/features/extensions", + }, + { + icon: SlidersHorizontal, + title: "Customization", + description: "Modify what Pilot auto-installs", + summary: + "Tweak the built-in /spec workflow, adjust rules, add hooks and agents, change the configured MCP and LSP servers, override auto-applied Claude settings. Ship as a git repo for your team or a local directory for personal use. Upstream drift detected automatically.", + href: "/docs/features/customization", }, ]; @@ -118,12 +128,14 @@ const WhatsInside = () => { {insideItems.map((item, index) => { const Icon = item.icon; return ( -
    {/* Icon and Title */} diff --git a/docs/site/src/components/WorkflowSteps.tsx b/docs/site/src/components/WorkflowSteps.tsx index fca483a7..96fd6eb8 100644 --- a/docs/site/src/components/WorkflowSteps.tsx +++ b/docs/site/src/components/WorkflowSteps.tsx @@ -7,6 +7,7 @@ import { MessageSquare, Brain, Lightbulb, + Gauge, } from "lucide-react"; import { useInView } from "@/hooks/use-in-view"; @@ -61,13 +62,13 @@ const WorkflowSteps = () => {

    - Define what to build + Brainstorm what to build

    - Strategic conversation with optional research. Challenges - assumptions, explores trade-offs, and produces a PRD — then + Back-and-forth brainstorming for vague ideas: Claude pitches + directions, pressure-tests them, and converges on a PRD — then hands off to /spec.

    @@ -197,47 +198,72 @@ const WorkflowSteps = () => {

    All Commands

    - diff --git a/docs/site/src/components/feedback/AnnotationToolbar.tsx b/docs/site/src/components/feedback/AnnotationToolbar.tsx deleted file mode 100644 index d1107845..00000000 --- a/docs/site/src/components/feedback/AnnotationToolbar.tsx +++ /dev/null @@ -1,95 +0,0 @@ -import { useEffect, useRef, useState } from "react"; -import { Button } from "@/components/ui/button"; -import { Card } from "@/components/ui/card"; -import type { PendingSelection } from "@/lib/annotation/types"; - -interface AnnotationToolbarProps { - selection: PendingSelection; - onSubmit: (text: string) => void; - onDismiss: () => void; -} - -/** - * Floating popover that appears at the text selection position. - * Dismisses on Escape or click outside. - */ -export function AnnotationToolbar({ selection, onSubmit, onDismiss }: AnnotationToolbarProps) { - const popoverRef = useRef(null); - const textareaRef = useRef(null); - const [text, setText] = useState(""); - - useEffect(() => { - textareaRef.current?.focus(); - }, []); - - useEffect(() => { - const handleKey = (e: KeyboardEvent) => { - if (e.key === "Escape") onDismiss(); - }; - const handleMouseDown = (e: MouseEvent) => { - requestAnimationFrame(() => { - if (popoverRef.current && !popoverRef.current.contains(e.target as Node)) { - onDismiss(); - } - }); - }; - document.addEventListener("keydown", handleKey); - document.addEventListener("mousedown", handleMouseDown); - return () => { - document.removeEventListener("keydown", handleKey); - document.removeEventListener("mousedown", handleMouseDown); - }; - }, [onDismiss]); - - const handleSubmit = () => { - const trimmed = text.trim(); - if (!trimmed) return; - onSubmit(trimmed); - setText(""); - }; - - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter" && !e.shiftKey) { - e.preventDefault(); - handleSubmit(); - } - }; - - const top = selection.rect.top - 108; - const left = selection.rect.left + selection.rect.width / 2; - - return ( -
    e.stopPropagation()} - > - -