diff --git a/.docs/design/toml-filter-dsl.md b/.docs/design/toml-filter-dsl.md new file mode 100644 index 0000000..bac8ef0 --- /dev/null +++ b/.docs/design/toml-filter-dsl.md @@ -0,0 +1,1138 @@ +# TOML Filter DSL Specification + +**Issue:** #59 +**Status:** Draft +**Date:** 2026-03-25 + +--- + +## 1. Purpose + +Allow users to define custom output filter rules in TOML format, extending +skim's built-in transformation modes with project-specific, version-controlled +filtering logic. + +Built-in modes (`structure`, `signatures`, `types`, `full`, `minimal`, `pseudo`) +cover common cases but cannot anticipate every project's needs. The TOML Filter +DSL enables: + +- Stripping debug/logging statements before sending code to an LLM +- Collapsing import blocks to save tokens +- Preserving public API surfaces while removing internal implementation +- Replacing verbose patterns with compact summaries +- Applying language-specific or mode-specific rules + +Filters compose with existing modes -- they run **after** the mode transformation, +providing a second pass of user-controlled refinement. + +--- + +## 2. File Location and Discovery + +### Project-level (recommended) + +``` +/.skim.toml +``` + +This file is already created by `skim init`. The `[filters]` section is optional +and coexists with any future configuration sections. + +### User-level (personal defaults) + +``` +~/.config/skim/filters.toml +``` + +User-level filters apply to all projects unless overridden by project-level rules +with the same `name`. + +### Precedence + +1. **Project-level** (`.skim.toml`) -- highest priority +2. **User-level** (`~/.config/skim/filters.toml`) -- lowest priority + +When both files define a rule with the same `name`, the project-level rule wins. +Rules from both files are merged into a single priority-ordered chain. + +--- + +## 3. Format + +### Minimal example + +```toml +[filters] + +[[filters.rules]] +name = "strip-debug-logs" +description = "Remove console.log and debug statements" +match = { pattern = "console\\.(log|debug|warn)\\(.*\\)", language = ["typescript", "javascript"] } +action = "remove" +priority = 10 +``` + +### Full example with all filter actions + +```toml +[filters] +# Optional metadata +version = 1 + +# Rule 1: Remove debug logging +[[filters.rules]] +name = "strip-debug-logs" +description = "Remove console.log and debug statements" +match = { pattern = "console\\.(log|debug|warn)\\(.*\\)", language = ["typescript", "javascript"] } +action = "remove" +priority = 10 + +# Rule 2: Collapse import blocks +[[filters.rules]] +name = "collapse-imports" +description = "Collapse import blocks to single summary line" +match = { node_type = "import_statement", language = ["typescript"] } +action = "collapse" +priority = 20 + +# Rule 3: Always preserve public exports +[[filters.rules]] +name = "keep-public-api" +description = "Always preserve public exports regardless of mode" +match = { node_type = "export_statement" } +action = "keep" +priority = 100 + +# Rule 4: Replace test boilerplate with summary +[[filters.rules]] +name = "summarize-test-setup" +description = "Replace beforeEach/afterEach blocks with summary comment" +match = { pattern = "(beforeEach|afterEach)\\s*\\(", language = ["typescript", "javascript"] } +action = { replace = "/* {name}: test lifecycle hook */" } +priority = 15 + +# Rule 5: Mode-specific rule (only in structure mode) +[[filters.rules]] +name = "strip-comments-in-structure" +description = "Remove all comments in structure mode for maximum compression" +match = { node_type = "comment", mode = ["structure"] } +action = "remove" +priority = 5 + +# Rule 6: Pattern with node_type combined +[[filters.rules]] +name = "strip-logging-calls" +description = "Remove logging function calls" +match = { node_type = "expression_statement", pattern = "logger\\.(info|debug|trace)\\(" } +action = "remove" +priority = 12 +``` + +--- + +## 4. Schema Reference + +### Top-level + +```toml +[filters] +version = 1 # Optional. Schema version for forward compatibility. + +[[filters.rules]] +# ... rule definitions +``` + +### Rule fields + +| Field | Type | Required | Description | +|---------------|---------------------|----------|--------------------------------------------------| +| `name` | `string` | Yes | Unique identifier for the rule | +| `description` | `string` | No | Human-readable description | +| `match` | `MatchCriteria` | Yes | Conditions that determine which code to match | +| `action` | `Action` | Yes | What to do with matched code | +| `priority` | `integer` | Yes | Execution order (higher = runs later, wins ties) | +| `enabled` | `boolean` | No | Default: `true`. Set `false` to disable without deleting | + +### MatchCriteria + +At least one of `pattern` or `node_type` must be specified. When both are +present, **both must match** (logical AND). + +| Field | Type | Required | Description | +|-------------|-------------------|----------|---------------------------------------------------| +| `pattern` | `string` (regex) | No* | Regex pattern matched against source text of node | +| `node_type` | `string` | No* | tree-sitter AST node type to match | +| `language` | `string[]` | No | Restrict to these languages. Default: all languages | +| `mode` | `string[]` | No | Restrict to these modes. Default: all modes | + +\* At least one of `pattern` or `node_type` is required. + +**Language values:** `typescript`, `javascript`, `python`, `rust`, `go`, `java`, +`c`, `cpp`, `markdown`, `json`, `yaml`, `toml` + +**Mode values:** `structure`, `signatures`, `types`, `full`, `minimal`, `pseudo` + +### Pattern matching details + +- Patterns use Rust `regex` crate syntax (compatible with PCRE-like patterns) +- Patterns are matched against the **source text** of the matched node (or line + if no `node_type` is specified) +- Backslashes must be escaped in TOML: `\\d` for regex `\d` +- Patterns are case-sensitive by default. Use `(?i)` prefix for case-insensitive + +### Node type matching details + +- Node types correspond to tree-sitter grammar node names +- Common node types by language: + +| Language | Common node types | +|------------|--------------------------------------------------------------------------| +| TypeScript | `import_statement`, `export_statement`, `function_declaration`, `class_declaration`, `comment`, `expression_statement`, `type_alias_declaration` | +| Python | `import_statement`, `import_from_statement`, `function_definition`, `class_definition`, `comment`, `expression_statement`, `decorated_definition` | +| Rust | `use_declaration`, `function_item`, `struct_item`, `impl_item`, `trait_item`, `macro_definition`, `line_comment`, `block_comment` | +| Go | `import_declaration`, `function_declaration`, `method_declaration`, `type_declaration`, `comment` | +| Java | `import_declaration`, `class_declaration`, `method_declaration`, `interface_declaration`, `line_comment`, `block_comment` | +| C/C++ | `preproc_include`, `function_definition`, `struct_specifier`, `comment` | + +--- + +## 5. Filter Actions + +### `remove` + +Delete the matched node/line entirely from output. + +```toml +action = "remove" +``` + +**Example input:** +```typescript +import { readFile } from "fs"; +console.log("starting up"); +export function process(data: string): Result { /* ... */ } +console.debug("debug info"); +``` + +**Rule:** +```toml +[[filters.rules]] +name = "strip-debug" +match = { pattern = "console\\.(log|debug)\\(" } +action = "remove" +priority = 10 +``` + +**Expected output:** +```typescript +import { readFile } from "fs"; +export function process(data: string): Result { /* ... */ } +``` + +### `collapse` + +Replace the matched node with a single-line summary showing the node type +and count. + +```toml +action = "collapse" +``` + +**Example input:** +```typescript +import { readFile } from "fs"; +import { writeFile } from "fs/promises"; +import { join, resolve } from "path"; +import { Config } from "./config"; +import { Logger } from "./logger"; + +export function main(): void { /* ... */ } +``` + +**Rule:** +```toml +[[filters.rules]] +name = "collapse-imports" +match = { node_type = "import_statement", language = ["typescript"] } +action = "collapse" +priority = 20 +``` + +**Expected output:** +```typescript +/* 5 import statements collapsed */ + +export function main(): void { /* ... */ } +``` + +Consecutive matched nodes are collapsed into a single summary line. Non-consecutive +matches each produce their own summary. + +### `keep` + +Force the matched node to be preserved in output, even if the current mode +would normally strip it. This is an override that prevents other rules and +mode transformations from removing the node. + +```toml +action = "keep" +``` + +**Example input (structure mode would strip function bodies):** +```typescript +export function publicApi(data: string): Result { + return validate(data).map(transform); +} + +function internalHelper(x: number): number { + return x * 2; +} +``` + +**Rule:** +```toml +[[filters.rules]] +name = "keep-exports" +match = { node_type = "export_statement" } +action = "keep" +priority = 100 +``` + +**Expected output (in structure mode):** +```typescript +export function publicApi(data: string): Result { + return validate(data).map(transform); +} + +function internalHelper(x: number): number { /* ... */ } +``` + +The exported function retains its body because the `keep` rule overrides +structure mode's body-stripping behavior. + +### `replace` + +Replace the matched node with a custom string. The replacement string supports +template variables: + +| Variable | Expands to | +|----------------|-----------------------------------------------| +| `{name}` | The rule's `name` field | +| `{node_type}` | The tree-sitter node type of the matched node | +| `{match_text}` | First 60 characters of the matched source text| +| `{line}` | Line number of the matched node | + +```toml +action = { replace = "/* {name}: {node_type} at line {line} */" } +``` + +**Example input:** +```typescript +beforeEach(async () => { + db = await createTestDatabase(); + cache = new MockCache(); + logger = new TestLogger(); + service = new UserService(db, cache, logger); +}); +``` + +**Rule:** +```toml +[[filters.rules]] +name = "summarize-test-setup" +match = { pattern = "beforeEach\\s*\\(", language = ["typescript"] } +action = { replace = "/* {name}: test lifecycle hook */" } +priority = 15 +``` + +**Expected output:** +```typescript +/* summarize-test-setup: test lifecycle hook */ +``` + +--- + +## 6. Priority Chain + +### Execution order + +Rules execute in priority order, lowest first. Within the same priority level, +project-level rules execute before user-level rules. + +### Built-in rule priorities + +Built-in mode transformations (structure, signatures, etc.) have an implicit +priority of **0**. User rules with priority > 0 can override built-in behavior. + +| Priority range | Owner | Description | +|----------------|--------------|--------------------------------------| +| 0 | Built-in | Mode transformations (structure, etc.)| +| 1 - 49 | User | Low-priority refinements | +| 50 - 99 | User | Standard filtering rules | +| 100+ | User | High-priority overrides (`keep` rules)| + +### Conflict resolution + +When multiple rules match the same node: + +1. Rules are applied in priority order (lowest first) +2. `keep` at any priority prevents `remove` at lower priority +3. `replace` at higher priority overrides `replace` at lower priority +4. `remove` at higher priority overrides `collapse` at lower priority +5. If two rules have the same priority and conflict, the **project-level** + rule wins over the **user-level** rule +6. If two rules from the same file have the same priority and conflict, + the rule defined **later** in the file wins (last-writer-wins) + +### Conflict examples + +```toml +# Rule A: priority 10, action "remove" +# Rule B: priority 100, action "keep" +# Result: Node is KEPT (B wins by priority) + +# Rule C: priority 50, action "collapse" (user-level) +# Rule D: priority 50, action "remove" (project-level) +# Result: Node is REMOVED (D wins by source precedence) +``` + +--- + +## 7. Trust Model + +### Trusted sources + +Both configuration files are considered trusted: + +1. **`.skim.toml`** -- under version control, reviewed in PRs. Trusted by default. +2. **`~/.config/skim/filters.toml`** -- user's own machine, user-controlled. + Trusted by default. + +### Security considerations + +- **Regex complexity:** Patterns are compiled with a size limit to prevent + ReDoS attacks. Patterns exceeding the limit are rejected at load time with + a clear error message. Default limit: 1 MB compiled regex size. +- **Rule count:** Maximum 100 rules per file (200 total across both files). + Prevents accidental performance degradation from excessive rules. +- **No code execution:** Filters are declarative only. No shell commands, + no scripting, no dynamic evaluation. The `replace` action supports only + the documented template variables. +- **No file system access:** Filters cannot read files, access environment + variables, or interact with the system beyond the transformation pipeline. + +### Untrusted input protection + +If a future feature allows loading filters from untrusted sources (e.g., +downloaded from a registry), the following safeguards must be added: + +- Explicit opt-in: `skim verify --trust ` +- Content-addressed integrity (SHA-256 hash pinning) +- Sandboxed regex execution with timeout + +These are **not implemented** in v1. Filters from `.skim.toml` and +`~/.config/skim/filters.toml` are trusted. + +--- + +## 8. `skim verify` Command + +### Purpose + +Validate TOML syntax, check for conflicting rules, and report the +precedence chain. Intended for CI pipelines and pre-commit hooks. + +### Usage + +```bash +# Validate project-level filters +skim verify + +# Validate a specific file +skim verify --file path/to/filters.toml + +# Verbose output showing full precedence chain +skim verify --verbose +``` + +### Validation checks + +| Check | Severity | Description | +|--------------------------|----------|-----------------------------------------------| +| TOML syntax | Error | File must be valid TOML | +| Schema conformance | Error | All required fields present, correct types | +| Unique rule names | Error | No duplicate `name` within a single file | +| Valid regex patterns | Error | All `pattern` values must compile | +| Valid node types | Warning | Node types checked against known grammar types| +| Valid language values | Error | Languages must be in supported set | +| Valid mode values | Error | Modes must be in supported set | +| Priority conflicts | Warning | Same-priority rules matching same criteria | +| Shadowed rules | Info | Project rules that shadow user-level rules | +| Rule count limit | Error | Exceeds 100 rules per file | +| Regex complexity | Error | Pattern exceeds compiled size limit | +| Dead rules | Warning | `enabled = false` rules | + +### Exit codes + +| Code | Meaning | +|------|--------------------------------------------------------| +| 0 | All checks pass (warnings printed to stderr) | +| 1 | One or more errors found | +| 2 | File not found or not readable | + +### Output format + +**Default (human-readable):** + +``` +Validating .skim.toml... + + Rules: 6 (6 enabled, 0 disabled) + Errors: 0 + Warnings: 1 + + Priority chain: + 5 strip-comments-in-structure [structure] remove comment + 10 strip-debug-logs [ts, js] remove pattern + 12 strip-logging-calls [all] remove expression_statement + pattern + 15 summarize-test-setup [ts, js] replace pattern + 20 collapse-imports [ts] collapse import_statement + 100 keep-public-api [all] keep export_statement + + Warnings: + - Rule "strip-debug-logs" and "strip-logging-calls" may match + overlapping content at different priorities (10 vs 12) + + Result: PASS (1 warning) +``` + +**JSON output (`skim verify --json`):** + +```json +{ + "file": ".skim.toml", + "rules_total": 6, + "rules_enabled": 6, + "errors": [], + "warnings": [ + { + "type": "potential_overlap", + "rules": ["strip-debug-logs", "strip-logging-calls"], + "message": "May match overlapping content at different priorities (10 vs 12)" + } + ], + "priority_chain": [ + { "priority": 5, "name": "strip-comments-in-structure", "action": "remove" }, + { "priority": 10, "name": "strip-debug-logs", "action": "remove" }, + { "priority": 12, "name": "strip-logging-calls", "action": "remove" }, + { "priority": 15, "name": "summarize-test-setup", "action": "replace" }, + { "priority": 20, "name": "collapse-imports", "action": "collapse" }, + { "priority": 100, "name": "keep-public-api", "action": "keep" } + ], + "result": "pass" +} +``` + +--- + +## 9. Pipeline Integration + +### Where filters are applied + +Filters sit between the mode transformation and output emission in +skim's processing pipeline: + +``` +Source Code + | + v +Language Detection + | + v +tree-sitter Parse (AST) + | + v +Mode Transformation (structure/signatures/types/full/minimal/pseudo) + | <-- Built-in priority 0 + v ++-------------------------------+ +| TOML Filter DSL | <-- User priorities 1+ +| | +| 1. Load rules from files | +| 2. Filter by language/mode | +| 3. Sort by priority | +| 4. Walk AST post-transform | +| 5. Apply matching rules | ++-------------------------------+ + | + v +Truncation (--max-lines, --last-lines, --tokens) + | + v +Token Counting (--show-stats) + | + v +Caching (write to ~/.cache/skim/) + | + v +Output (stdout) +``` + +### Integration with existing modes + +Filters see the **post-transformation** output, not the raw source. This means: + +- In `structure` mode, function bodies are already replaced with `/* ... */` + before filters run. A filter cannot match against the original body text. +- In `full` mode, filters see the complete source and can strip/collapse/keep + any part of it. +- In `types` mode, only type definitions survive the mode pass. Filters + can further refine which types to keep. + +### Integration with caching + +Cache keys must include a hash of the active filter rules to prevent stale +cache hits when rules change: + +``` +CacheKey { + path: PathBuf, + mtime: SystemTime, + mode: String, + filter_hash: Option, // NEW: hash of applicable filter rules +} +``` + +When no filters are defined, `filter_hash` is `None` and caching works +exactly as before (backward compatible). + +### Integration with token counting + +Filters may increase or decrease token count. The `--show-stats` output +should reflect the final post-filter token count: + +``` +Tokens: 1,234 -> 456 (63% reduction) + ^ ^ + | +-- After mode + filters + +----------- Original source +``` + +### Integration with multi-file processing + +Filters are loaded once at startup and shared across all files in a +multi-file/glob invocation. Per-file filtering uses the `language` and +`mode` fields to determine which rules apply to each file. + +--- + +## 10. Rust Types (Implementation Reference) + +These types are provided for implementors. They are NOT part of the public API +and may change during implementation. + +```rust +use std::path::PathBuf; + +/// A single filter rule parsed from TOML. +#[derive(Debug, Clone)] +pub struct FilterRule { + pub name: String, + pub description: Option, + pub match_criteria: MatchCriteria, + pub action: FilterAction, + pub priority: i32, + pub enabled: bool, + pub source: FilterSource, +} + +/// Where a rule was loaded from (for conflict resolution). +#[derive(Debug, Clone, PartialEq)] +pub enum FilterSource { + Project(PathBuf), + User(PathBuf), +} + +/// Conditions that determine which AST nodes to match. +#[derive(Debug, Clone)] +pub struct MatchCriteria { + /// Regex pattern matched against source text. + pub pattern: Option, + /// tree-sitter node type name. + pub node_type: Option, + /// Restrict to specific languages. None = all languages. + pub languages: Option>, + /// Restrict to specific modes. None = all modes. + pub modes: Option>, +} + +/// Action to take on matched nodes. +#[derive(Debug, Clone)] +pub enum FilterAction { + /// Delete the node from output. + Remove, + /// Collapse consecutive matched nodes into a summary. + Collapse, + /// Force-keep the node (override mode stripping). + Keep, + /// Replace with a template string. + Replace(String), +} + +/// Loaded and validated filter configuration. +#[derive(Debug)] +pub struct FilterConfig { + pub rules: Vec, + /// Precomputed hash for cache key integration. + pub hash: u64, +} + +impl FilterConfig { + /// Load filters from project and user paths, merge and validate. + pub fn load( + project_path: Option<&Path>, + user_path: Option<&Path>, + ) -> Result { + // 1. Parse TOML from both files + // 2. Validate schema + // 3. Merge with project-level precedence + // 4. Sort by priority + // 5. Compute hash + todo!() + } + + /// Return only rules applicable to the given language and mode. + pub fn rules_for(&self, language: &str, mode: &str) -> Vec<&FilterRule> { + self.rules + .iter() + .filter(|r| r.enabled) + .filter(|r| match &r.match_criteria.languages { + Some(langs) => langs.iter().any(|l| l == language), + None => true, + }) + .filter(|r| match &r.match_criteria.modes { + Some(modes) => modes.iter().any(|m| m == mode), + None => true, + }) + .collect() + } +} + +/// Errors from filter loading and validation. +#[derive(Debug, thiserror::Error)] +pub enum FilterError { + #[error("TOML parse error in {path}: {source}")] + TomlParse { + path: PathBuf, + source: toml::de::Error, + }, + #[error("invalid regex in rule '{rule}': {source}")] + InvalidRegex { + rule: String, + source: regex::Error, + }, + #[error("rule '{rule}' missing required field: {field}")] + MissingField { + rule: String, + field: String, + }, + #[error("duplicate rule name '{name}' in {path}")] + DuplicateName { + name: String, + path: PathBuf, + }, + #[error("too many rules in {path}: {count} (maximum: 100)")] + TooManyRules { + path: PathBuf, + count: usize, + }, + #[error("regex too complex in rule '{rule}': compiled size exceeds limit")] + RegexTooComplex { + rule: String, + }, + #[error("unknown language '{language}' in rule '{rule}'")] + UnknownLanguage { + language: String, + rule: String, + }, + #[error("unknown mode '{mode}' in rule '{rule}'")] + UnknownMode { + mode: String, + rule: String, + }, +} +``` + +--- + +## 11. Inline Test Examples + +### Test: `remove` action strips matching lines + +**Input** (`test.ts`, mode: `structure`): +```typescript +import { Result } from "./types"; +console.log("booting"); +export function handle(req: Request): Result { /* ... */ } +console.debug("req:", req); +export function health(): string { /* ... */ } +``` + +**Rules:** +```toml +[[filters.rules]] +name = "strip-console" +match = { pattern = "console\\.(log|debug)\\(", language = ["typescript"] } +action = "remove" +priority = 10 +``` + +**Expected output:** +```typescript +import { Result } from "./types"; +export function handle(req: Request): Result { /* ... */ } +export function health(): string { /* ... */ } +``` + +--- + +### Test: `collapse` action merges consecutive imports + +**Input** (`app.ts`, mode: `full`): +```typescript +import { readFile } from "fs"; +import { join } from "path"; +import { Config } from "./config"; + +export class App { + constructor(private config: Config) {} +} +``` + +**Rules:** +```toml +[[filters.rules]] +name = "collapse-imports" +match = { node_type = "import_statement", language = ["typescript"] } +action = "collapse" +priority = 20 +``` + +**Expected output:** +```typescript +/* 3 import statements collapsed */ + +export class App { + constructor(private config: Config) {} +} +``` + +--- + +### Test: `keep` action overrides mode stripping + +**Input** (`lib.rs`, mode: `signatures`): +```rust +pub fn public_api(data: &str) -> Result { + validate(data)?; + transform(data) +} + +fn internal_helper(x: i32) -> i32 { + x * 2 +} +``` + +**Rules:** +```toml +[[filters.rules]] +name = "keep-public" +match = { pattern = "^pub\\s+fn", language = ["rust"] } +action = "keep" +priority = 100 +``` + +**Expected output:** +```rust +pub fn public_api(data: &str) -> Result { + validate(data)?; + transform(data) +} + +fn internal_helper(x: i32) -> i32 { /* ... */ } +``` + +The `keep` rule preserves the full body of `public_api` even though +signatures mode would normally strip it. + +--- + +### Test: `replace` action with template variables + +**Input** (`test.spec.ts`, mode: `structure`): +```typescript +describe("UserService", () => { + beforeEach(async () => { + db = await createTestDb(); + cache = new MockCache(); + logger = new TestLogger(); + service = new UserService(db, cache, logger); + }); + + it("creates user", () => { /* ... */ }); +}); +``` + +**Rules:** +```toml +[[filters.rules]] +name = "summarize-setup" +match = { pattern = "beforeEach\\s*\\(", language = ["typescript"] } +action = { replace = "/* {name}: test setup ({node_type}) */" } +priority = 15 +``` + +**Expected output:** +```typescript +describe("UserService", () => { + /* summarize-setup: test setup (expression_statement) */ + + it("creates user", () => { /* ... */ }); +}); +``` + +--- + +### Test: mode-restricted rule only fires in specified mode + +**Input** (`util.py`, mode: `full`): +```python +# Helper utilities +def add(a: int, b: int) -> int: + return a + b +``` + +**Rules:** +```toml +[[filters.rules]] +name = "strip-comments-structure" +match = { node_type = "comment", mode = ["structure"] } +action = "remove" +priority = 5 +``` + +**Expected output (mode: `full`):** +```python +# Helper utilities +def add(a: int, b: int) -> int: + return a + b +``` + +The rule does NOT fire because the current mode is `full`, not `structure`. +The same input in `structure` mode would have the comment removed. + +--- + +### Test: combined `node_type` + `pattern` match (AND logic) + +**Input** (`server.ts`, mode: `structure`): +```typescript +app.get("/health", healthHandler); +app.post("/users", createUser); +logger.info("server started"); +logger.debug("debug mode"); +``` + +**Rules:** +```toml +[[filters.rules]] +name = "strip-logger-calls" +match = { node_type = "expression_statement", pattern = "logger\\.(info|debug)\\(" } +action = "remove" +priority = 12 +``` + +**Expected output:** +```typescript +app.get("/health", healthHandler); +app.post("/users", createUser); +``` + +Both `node_type` AND `pattern` must match. The `app.get` and `app.post` lines +are `expression_statement` nodes but don't match the `logger` pattern, so they +are preserved. + +--- + +### Test: priority conflict resolution + +**Input** (`api.ts`, mode: `structure`): +```typescript +console.log("request received"); +export function handler(): void { /* ... */ } +``` + +**Rules:** +```toml +# Lower priority: remove all expression statements +[[filters.rules]] +name = "strip-expressions" +match = { node_type = "expression_statement" } +action = "remove" +priority = 10 + +# Higher priority: keep console.log for debugging +[[filters.rules]] +name = "keep-console" +match = { pattern = "console\\.log\\(" } +action = "keep" +priority = 50 +``` + +**Expected output:** +```typescript +console.log("request received"); +export function handler(): void { /* ... */ } +``` + +The `keep` rule at priority 50 overrides the `remove` rule at priority 10 +for the `console.log` line. + +--- + +## 12. Error Messages + +### TOML parse error + +``` +error: invalid TOML in .skim.toml + --> line 5, column 12 + | + | match = { pattern = "unclosed + | ^^^^^^^ + = expected closing quote + +hint: validate your TOML at https://www.toml-lint.com/ +``` + +### Invalid regex + +``` +error: invalid regex in rule 'strip-debug' + pattern: console\.(log|debug\( + ^ + = unclosed group + +hint: escape special characters with double backslash in TOML (e.g., \\() +``` + +### Missing required field + +``` +error: rule 'my-rule' in .skim.toml is missing required field 'action' + + [[filters.rules]] + name = "my-rule" + match = { pattern = "TODO" } + # action = ??? <-- required + +hint: action must be one of: "remove", "collapse", "keep", or { replace = "..." } +``` + +### Duplicate rule name + +``` +error: duplicate rule name 'strip-debug' in .skim.toml + first definition at line 8 + duplicate at line 22 + +hint: rename one of the rules to make names unique +``` + +--- + +## 13. CLI Integration + +### Flags + +```bash +# Explicitly specify filter file (overrides auto-discovery) +skim file.ts --filters path/to/custom-filters.toml + +# Disable all filters (even if .skim.toml exists) +skim file.ts --no-filters + +# Show which filters matched (debug output to stderr) +skim file.ts --debug-filters +``` + +### Environment variables + +| Variable | Description | +|---------------------|--------------------------------------------------| +| `SKIM_FILTERS_FILE` | Override filter file path (takes precedence over auto-discovery) | +| `SKIM_NO_FILTERS` | Set to `1`/`true`/`yes` to disable all filters | + +--- + +## 14. Future Considerations + +### Not in v1 (documented for future reference) + +1. **Filter registry/sharing:** A community registry of filter presets + (e.g., `skim filters add react-best-practices`). Requires trust model + extensions (Section 7). + +2. **Conditional actions:** Rules with `if`/`else` logic based on sibling + nodes or parent context. Increases complexity significantly. + +3. **Filter statistics:** `skim stats --filters` showing which rules fired + most often, token savings per rule. Requires analytics pipeline + integration. + +4. **Live preview:** `skim verify --preview file.ts` showing the effect of + filters on a specific file. Useful for iterating on rule definitions. + +5. **Filter inheritance:** `.skim.toml` in subdirectories inheriting from + parent directories. Adds resolution complexity. + +6. **Negative patterns:** `match = { not_pattern = "..." }` for exclusion + logic. Can be approximated with `keep` rules at higher priority. + +--- + +## 15. Design Decisions + +### Why TOML (not YAML or JSON)? + +1. **Already in use:** `.skim.toml` exists from `skim init`. No new file format. +2. **Comment support:** TOML supports inline comments; JSON does not. +3. **Readability:** TOML is more human-friendly than JSON for configuration. +4. **Rust ecosystem:** TOML is the standard configuration format in Rust projects. + The `toml` crate is already a dependency. + +### Why post-transform filtering (not pre-transform)? + +Filters run after mode transformation because: + +1. **Composability:** Users can combine any mode with any filter set. +2. **Predictability:** The mode determines the baseline; filters refine it. +3. **Performance:** Filtering a smaller post-transform AST is faster than + filtering the full source. +4. **Simplicity:** Pre-transform filtering would require two AST passes and + complex interaction semantics with mode transformations. + +### Why priority numbers (not ordered lists)? + +1. **Mergeability:** Two files can define rules with interleaved priorities + without knowing about each other. +2. **Overridability:** Project rules can slot between user-level rules. +3. **Explicitness:** The priority number makes conflict resolution visible + and debuggable. + +### Why AND logic for combined match criteria? + +When both `pattern` and `node_type` are specified, both must match. This +provides precision: match only `expression_statement` nodes that contain +a specific pattern, not all nodes matching either condition. OR logic can +be achieved by defining two separate rules. diff --git a/README.md b/README.md index edfeb42..2c1f83a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# Skim: The Fastest, Most Comprehensive Context Optimization Tool for AI Coding Agents +# Skim: The Most Intelligent Context Optimization Engine for Coding Agents > **Code skimming. Command rewriting. Test, build, and git output compression. Token budget cascading.** 12 languages. 14ms for 3,000 lines. Built in Rust. -Other tools skim code. Skim optimizes everything your AI agent touches: code, test output, build errors, git diffs, and raw commands. 14ms for 3,000 lines. 48x faster on cache hits. Nothing else comes close. +Other tools filter terminal noise. Skim understands your code. It parses ASTs across 12 languages, strips implementation while preserving architecture, then optimizes every other type of context your agent consumes: test output, build errors, git diffs, and raw commands. 14ms for 3,000 lines. 48x faster on cache hits. [![Website](https://img.shields.io/badge/Website-skim-e87040)](https://dean0x.github.io/x/skim/) [![CI](https://github.com/dean0x/skim/actions/workflows/ci.yml/badge.svg)](https://github.com/dean0x/skim/actions/workflows/ci.yml) @@ -13,7 +13,7 @@ Other tools skim code. Skim optimizes everything your AI agent touches: code, te ## Why Skim? -**Context capacity is not the bottleneck. Attention is.** Every token you send to an LLM dilutes its focus. Research consistently shows attention dilution in long contexts -- models lose track of critical details even within their window. More tokens means higher latency, degraded recall, and weaker reasoning. Past a threshold, adding context makes outputs worse. While other tools stop at code skimming, Skim optimizes the full spectrum of AI agent context: code, test output, build errors, git diffs, and commands. Faster, broader, and smarter than anything else available. +**Context capacity is not the bottleneck. Attention is.** Every token you send to an LLM dilutes its focus. Research consistently shows attention dilution in long contexts -- models lose track of critical details even within their window. More tokens means higher latency, degraded recall, and weaker reasoning. Past a threshold, adding context makes outputs worse. While other tools stop at filtering command output, Skim parses your actual code structure and optimizes the full spectrum of agent context: code, test output, build errors, git diffs, and commands. Deeper, broader, and smarter than anything else available. Take a typical 80-file TypeScript project: 63,000 tokens. That contains maybe 5,000 tokens of actual signal. The rest is implementation noise the model doesn't need for architectural reasoning. @@ -603,8 +603,8 @@ Comprehensive guides for all aspects of Skim: | Tool | Role | What It Does | |------|------|-------------| -| **Skim** | Context Optimization | Compresses code, test output, build output, and git output for optimal LLM reasoning | -| **[DevFlow](https://github.com/dean0x/devflow)** | Quality Orchestration | 18 parallel reviewers, working memory, self-learning, production-grade lifecycle workflows | +| **Skim** | Context Optimization | Code-aware AST parsing across 12 languages, command rewriting, test/build/git output compression | +| **[DevFlow](https://github.com/dean0x/devflow)** | Quality Orchestration | 18 parallel reviewers, working memory, self-learning, composable plugin system | | **[Backbeat](https://github.com/dean0x/backbeat)** | Agent Orchestration | Orchestration at scale. Karpathy optimization loops, multi-agent pipelines, DAG dependencies, autoscaling | Skim optimizes every byte of context. DevFlow enforces production-grade quality. Backbeat scales execution across agents. No other stack covers all three. diff --git a/crates/rskim-core/Cargo.toml b/crates/rskim-core/Cargo.toml index baab45b..f0e0d00 100644 --- a/crates/rskim-core/Cargo.toml +++ b/crates/rskim-core/Cargo.toml @@ -4,7 +4,7 @@ version = "1.0.0" edition = "2021" authors = ["Skim Contributors"] license = "MIT" -description = "Core library for the fastest, most comprehensive context optimization tool for AI coding agents" +description = "Core library for the most intelligent context optimization engine for coding agents" repository = "https://github.com/dean0x/skim" readme = "README.md" keywords = ["ast", "code-analysis", "tree-sitter", "llm"] diff --git a/crates/rskim/Cargo.toml b/crates/rskim/Cargo.toml index 81bca3b..a6de870 100644 --- a/crates/rskim/Cargo.toml +++ b/crates/rskim/Cargo.toml @@ -4,7 +4,7 @@ version = "1.0.0" edition = "2021" authors = ["Skim Contributors"] license = "MIT" -description = "The fastest, most comprehensive context optimization tool for AI coding agents. Code skimming, command rewriting, output compression." +description = "The most intelligent context optimization engine for coding agents. Code-aware AST parsing, command rewriting, output compression." repository = "https://github.com/dean0x/skim" readme = "README.md" diff --git a/crates/rskim/src/cmd/agents/detection.rs b/crates/rskim/src/cmd/agents/detection.rs new file mode 100644 index 0000000..38898ac --- /dev/null +++ b/crates/rskim/src/cmd/agents/detection.rs @@ -0,0 +1,550 @@ +//! Agent detection logic for the `skim agents` subcommand. + +use std::path::{Path, PathBuf}; + +use crate::cmd::init::MAX_SETTINGS_SIZE; +use crate::cmd::session::AgentKind; + +use super::types::{AgentStatus, HookStatus, RulesInfo, SessionInfo}; +use super::util::{count_files_in_dir, count_files_recursive, dir_size_human, tilde_path}; + +/// Detect all supported agents and return their status. +pub(super) fn detect_all_agents() -> Vec { + let home = dirs::home_dir(); + AgentKind::all_supported() + .iter() + .copied() + .map(|kind| detect_agent(kind, home.as_deref())) + .collect() +} + +/// Detect a single agent's status. +fn detect_agent(kind: AgentKind, home: Option<&Path>) -> AgentStatus { + match kind { + AgentKind::ClaudeCode => detect_claude_code(home), + AgentKind::Cursor => detect_cursor(home), + AgentKind::CodexCli => detect_codex_cli(home), + AgentKind::GeminiCli => detect_gemini_cli(home), + AgentKind::CopilotCli => detect_copilot_cli(), + AgentKind::OpenCode => detect_opencode(), + } +} + +fn detect_claude_code(home: Option<&Path>) -> AgentStatus { + let projects_dir = std::env::var("SKIM_PROJECTS_DIR") + .ok() + .map(PathBuf::from) + .or_else(|| home.map(|h| AgentKind::ClaudeCode.config_dir(h).join("projects"))); + + let detected = projects_dir.as_ref().is_some_and(|p| p.is_dir()); + + let sessions = if detected { + projects_dir.as_ref().map(|p| { + let count = count_files_recursive(p, "jsonl"); + SessionInfo { + path: tilde_path(p), + detail: format!("{count} files"), + } + }) + } else { + None + }; + + let config_dir = home.map(|h| AgentKind::ClaudeCode.config_dir(h)); + let hooks = detect_pretooluse_hook(config_dir.as_deref()); + + let rules_dir = AgentKind::ClaudeCode.project_dir().join("rules"); + let rules = Some(RulesInfo { + path: format!("{}/", rules_dir.display()), + exists: rules_dir.is_dir(), + }); + + AgentStatus { + kind: AgentKind::ClaudeCode, + detected, + sessions, + hooks, + rules, + } +} + +fn detect_cursor(home: Option<&Path>) -> AgentStatus { + // config_dir() handles macOS vs Linux detection internally + let state_path = home.and_then(|h| { + let path = AgentKind::Cursor.config_dir(h); + if path.is_dir() { + Some(path) + } else { + None + } + }); + + let detected = state_path.is_some(); + + let sessions = state_path.as_ref().map(|p| { + let size = dir_size_human(p); + SessionInfo { + path: tilde_path(p), + detail: size, + } + }); + + let hooks = detect_pretooluse_hook(state_path.as_deref()); + + let rules_dir = AgentKind::Cursor.project_dir().join("rules"); + let rules = Some(RulesInfo { + path: format!("{}/", rules_dir.display()), + exists: rules_dir.is_dir(), + }); + + AgentStatus { + kind: AgentKind::Cursor, + detected, + sessions, + hooks, + rules, + } +} + +fn detect_codex_cli(home: Option<&Path>) -> AgentStatus { + let codex_dir = home.map(|h| AgentKind::CodexCli.config_dir(h)); + let detected = codex_dir.as_ref().is_some_and(|p| p.is_dir()); + + let sessions = if detected { + codex_dir.as_ref().and_then(|p| { + let sessions_dir = p.join("sessions"); + if sessions_dir.is_dir() { + let count = count_files_in_dir(&sessions_dir); + Some(SessionInfo { + path: tilde_path(&sessions_dir), + detail: format!("{count} files"), + }) + } else { + None + } + }) + } else { + None + }; + + // Codex CLI has experimental hook support + let hooks = HookStatus::NotSupported { + note: "experimental hooks only", + }; + + let rules = codex_dir.as_ref().map(|p| { + let instructions_dir = p.join("instructions"); + RulesInfo { + path: tilde_path(&instructions_dir), + exists: instructions_dir.is_dir(), + } + }); + + AgentStatus { + kind: AgentKind::CodexCli, + detected, + sessions, + hooks, + rules, + } +} + +fn detect_gemini_cli(home: Option<&Path>) -> AgentStatus { + let gemini_dir = home.map(|h| AgentKind::GeminiCli.config_dir(h)); + let detected = gemini_dir.as_ref().is_some_and(|p| p.is_dir()); + + let sessions = None; // Gemini CLI doesn't persist session files locally + + // Gemini CLI supports BeforeTool/AfterTool hooks + let hooks = if detected { + let has_hook = gemini_dir + .as_ref() + .and_then(|p| read_settings_guarded(&p.join("settings.json"))) + .is_some_and(|v| has_skim_hook_in_settings(&v)); + if has_hook { + HookStatus::Installed { + version: None, + integrity: "ok", + } + } else { + HookStatus::NotInstalled + } + } else { + HookStatus::NotInstalled + }; + + let rules = gemini_dir.as_ref().map(|p| { + let settings = p.join("settings.json"); + RulesInfo { + path: tilde_path(&settings), + exists: settings.is_file(), + } + }); + + AgentStatus { + kind: AgentKind::GeminiCli, + detected, + sessions, + hooks, + rules, + } +} + +/// Maximum number of directory entries to scan in `detect_copilot_cli` +/// to prevent unbounded I/O on adversarial `.github/hooks/` directories. +const MAX_COPILOT_HOOK_ENTRIES: usize = 50; + +fn detect_copilot_cli() -> AgentStatus { + // Copilot CLI uses .github/hooks/ for hook configuration + let hooks_dir = AgentKind::CopilotCli.project_dir().join("hooks"); + let detected = hooks_dir.is_dir(); + + let sessions = None; // Copilot CLI sessions are cloud-managed + + let hooks = if detected { + let has_skim_hook = std::fs::read_dir(hooks_dir).ok().is_some_and(|entries| { + entries.flatten().take(MAX_COPILOT_HOOK_ENTRIES).any(|e| { + let path = e.path(); + path.extension().is_some_and(|ext| ext == "json") + && std::fs::metadata(&path) + .ok() + .is_some_and(|m| m.len() <= MAX_SETTINGS_SIZE) + && std::fs::read_to_string(&path) + .ok() + .is_some_and(|c| c.contains("skim")) + }) + }); + if has_skim_hook { + HookStatus::Installed { + version: None, + integrity: "ok", + } + } else { + HookStatus::NotInstalled + } + } else { + HookStatus::NotInstalled + }; + + let rules = None; // Copilot uses .github/ conventions, not a separate rules dir + + AgentStatus { + kind: AgentKind::CopilotCli, + detected, + sessions, + hooks, + rules, + } +} + +fn detect_opencode() -> AgentStatus { + // OpenCode uses .opencode/ directory in project root + let opencode_dir = std::env::var("SKIM_OPENCODE_DIR") + .ok() + .map(PathBuf::from) + .unwrap_or_else(|| AgentKind::OpenCode.project_dir()); + let detected = opencode_dir.is_dir(); + + let sessions = if detected { + let count = count_files_in_dir(&opencode_dir); + Some(SessionInfo { + path: tilde_path(&opencode_dir), + detail: format!("{count} files"), + }) + } else { + None + }; + + let hooks = HookStatus::NotSupported { + note: "TypeScript plugin model", + }; + + let rules = None; // OpenCode uses AGENTS.md, not a rules directory + + AgentStatus { + kind: AgentKind::OpenCode, + detected, + sessions, + hooks, + rules, + } +} + +/// Read and parse a JSON settings file with a size guard. +/// +/// Returns `None` if the file is missing, too large (> [`MAX_SETTINGS_SIZE`]), +/// or not valid JSON. +fn read_settings_guarded(path: &Path) -> Option { + let meta = std::fs::metadata(path).ok()?; + if meta.len() > MAX_SETTINGS_SIZE { + return None; + } + let contents = std::fs::read_to_string(path).ok()?; + serde_json::from_str(&contents).ok() +} + +/// Check whether a Gemini CLI settings object contains any hook whose +/// command references "skim". +fn has_skim_hook_in_settings(settings: &serde_json::Value) -> bool { + let hooks = match settings.get("hooks").and_then(|v| v.as_object()) { + Some(h) => h, + None => return false, + }; + hooks.values().any(|arr| { + arr.as_array().is_some_and(|entries| { + entries.iter().any(|e| { + e.get("command") + .and_then(|c| c.as_str()) + .is_some_and(|cmd| cmd.contains("skim")) + }) + }) + }) +} + +/// Detect skim hook via the PreToolUse + skim-rewrite.sh pattern. +/// +/// Shared by Claude Code and Cursor, which both use the same hook mechanism. +fn detect_pretooluse_hook(config_dir: Option<&Path>) -> HookStatus { + let Some(config_dir) = config_dir else { + return HookStatus::NotInstalled; + }; + + let settings_path = config_dir.join("settings.json"); + + let json = match read_settings_guarded(&settings_path) { + Some(v) => v, + None => return HookStatus::NotInstalled, + }; + + // Check if hooks.PreToolUse contains a skim-rewrite entry + let has_hook = json + .get("hooks") + .and_then(|h| h.get("PreToolUse")) + .and_then(|ptu| ptu.as_array()) + .is_some_and(|entries| entries.iter().any(crate::cmd::init::has_skim_hook_entry)); + + if !has_hook { + return HookStatus::NotInstalled; + } + + // Try to extract version from hook script + let hook_script = config_dir.join("hooks").join("skim-rewrite.sh"); + let version = std::fs::read_to_string(&hook_script) + .ok() + .and_then(|contents| { + contents.lines().find_map(|line| { + line.strip_prefix("# skim-hook v") + .or_else(|| { + line.strip_prefix("export SKIM_HOOK_VERSION=\"") + .and_then(|s| s.strip_suffix('"')) + }) + .map(|s| s.to_string()) + }) + }); + + // Check integrity using SHA-256 verification + let integrity = if !hook_script.is_file() { + "missing" + } else { + match crate::cmd::integrity::verify_script_integrity( + config_dir, + "claude-code", + &hook_script, + ) { + Ok(true) => "ok", + Ok(false) => "tampered", + Err(_) => "unknown", + } + }; + + HookStatus::Installed { version, integrity } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_detect_all_agents_returns_all_kinds() { + let agents = detect_all_agents(); + assert_eq!(agents.len(), AgentKind::all_supported().len()); + for kind in AgentKind::all_supported() { + assert!( + agents.iter().any(|a| a.kind == *kind), + "missing agent kind: {:?}", + kind + ); + } + } + + #[test] + fn test_detect_pretooluse_hook_integrity_ok() { + let dir = tempfile::TempDir::new().unwrap(); + let config = dir.path(); + let hooks_dir = config.join("hooks"); + std::fs::create_dir_all(&hooks_dir).unwrap(); + + let settings = serde_json::json!({ + "hooks": { + "PreToolUse": [{ + "matcher": "Bash", + "hooks": [{"type": "command", "command": hooks_dir.join("skim-rewrite.sh").to_str().unwrap()}] + }] + } + }); + std::fs::write( + config.join("settings.json"), + serde_json::to_string_pretty(&settings).unwrap(), + ) + .unwrap(); + + let script_path = hooks_dir.join("skim-rewrite.sh"); + std::fs::write( + &script_path, + "#!/usr/bin/env bash\n# skim-hook v1.0.0\nexec skim rewrite --hook\n", + ) + .unwrap(); + let hash = crate::cmd::integrity::compute_file_hash(&script_path).unwrap(); + crate::cmd::integrity::write_hash_manifest(config, "claude-code", "skim-rewrite.sh", &hash) + .unwrap(); + + let status = detect_pretooluse_hook(Some(config)); + match status { + HookStatus::Installed { integrity, .. } => { + assert_eq!( + integrity, "ok", + "integrity should be 'ok' for valid script+hash" + ); + } + other => panic!("expected HookStatus::Installed, got: {other:?}"), + } + } + + #[test] + fn test_detect_pretooluse_hook_integrity_tampered() { + let dir = tempfile::TempDir::new().unwrap(); + let config = dir.path(); + let hooks_dir = config.join("hooks"); + std::fs::create_dir_all(&hooks_dir).unwrap(); + + let settings = serde_json::json!({ + "hooks": { + "PreToolUse": [{ + "matcher": "Bash", + "hooks": [{"type": "command", "command": hooks_dir.join("skim-rewrite.sh").to_str().unwrap()}] + }] + } + }); + std::fs::write( + config.join("settings.json"), + serde_json::to_string_pretty(&settings).unwrap(), + ) + .unwrap(); + + let script_path = hooks_dir.join("skim-rewrite.sh"); + std::fs::write( + &script_path, + "#!/usr/bin/env bash\n# skim-hook v1.0.0\nexec skim rewrite --hook\n", + ) + .unwrap(); + let hash = crate::cmd::integrity::compute_file_hash(&script_path).unwrap(); + crate::cmd::integrity::write_hash_manifest(config, "claude-code", "skim-rewrite.sh", &hash) + .unwrap(); + + // Tamper with the script + std::fs::write(&script_path, "#!/usr/bin/env bash\necho HACKED\n").unwrap(); + + let status = detect_pretooluse_hook(Some(config)); + match status { + HookStatus::Installed { integrity, .. } => { + assert_eq!( + integrity, "tampered", + "integrity should be 'tampered' for modified script" + ); + } + other => panic!("expected HookStatus::Installed, got: {other:?}"), + } + } + + #[test] + fn test_detect_pretooluse_hook_integrity_missing_script() { + let dir = tempfile::TempDir::new().unwrap(); + let config = dir.path(); + let hooks_dir = config.join("hooks"); + std::fs::create_dir_all(&hooks_dir).unwrap(); + + let settings = serde_json::json!({ + "hooks": { + "PreToolUse": [{ + "matcher": "Bash", + "hooks": [{"type": "command", "command": hooks_dir.join("skim-rewrite.sh").to_str().unwrap()}] + }] + } + }); + std::fs::write( + config.join("settings.json"), + serde_json::to_string_pretty(&settings).unwrap(), + ) + .unwrap(); + + let status = detect_pretooluse_hook(Some(config)); + match status { + HookStatus::Installed { integrity, .. } => { + assert_eq!( + integrity, "missing", + "integrity should be 'missing' for absent script" + ); + } + other => panic!("expected HookStatus::Installed, got: {other:?}"), + } + } + + #[test] + fn test_has_skim_hook_in_settings_true() { + let settings = serde_json::json!({ + "hooks": { + "BeforeTool": [{ + "command": "/usr/local/bin/skim rewrite --hook" + }] + } + }); + assert!(has_skim_hook_in_settings(&settings)); + } + + #[test] + fn test_has_skim_hook_in_settings_false() { + let settings = serde_json::json!({ + "hooks": { + "BeforeTool": [{ + "command": "/usr/local/bin/other-tool" + }] + } + }); + assert!(!has_skim_hook_in_settings(&settings)); + } + + #[test] + fn test_has_skim_hook_in_settings_no_hooks() { + let settings = serde_json::json!({ "theme": "dark" }); + assert!(!has_skim_hook_in_settings(&settings)); + } + + #[test] + fn test_read_settings_guarded_rejects_oversized() { + let dir = tempfile::TempDir::new().unwrap(); + let path = dir.path().join("big.json"); + let data = vec![b' '; (MAX_SETTINGS_SIZE as usize) + 1]; + std::fs::write(&path, data).unwrap(); + assert!(read_settings_guarded(&path).is_none()); + } + + #[test] + fn test_read_settings_guarded_valid() { + let dir = tempfile::TempDir::new().unwrap(); + let path = dir.path().join("ok.json"); + std::fs::write(&path, r#"{"key":"value"}"#).unwrap(); + let v = read_settings_guarded(&path); + assert!(v.is_some()); + assert_eq!(v.unwrap().get("key").unwrap().as_str().unwrap(), "value"); + } +} diff --git a/crates/rskim/src/cmd/agents/formatting.rs b/crates/rskim/src/cmd/agents/formatting.rs new file mode 100644 index 0000000..540b5a6 --- /dev/null +++ b/crates/rskim/src/cmd/agents/formatting.rs @@ -0,0 +1,119 @@ +//! Output formatting for the `skim agents` subcommand. + +use super::types::{AgentStatus, HookStatus}; + +pub(super) fn print_text(agents: &[AgentStatus]) { + println!("Detected agents:"); + for agent in agents { + println!(); + if agent.detected { + println!(" {} detected", agent.kind.display_name()); + } else { + println!(" {} not detected", agent.kind.display_name()); + continue; + } + + // Sessions + if let Some(ref sessions) = agent.sessions { + println!( + " {:width$}sessions: {} ({})", + "", + sessions.path, + sessions.detail, + width = agent.kind.display_name().len() + 3, + ); + } + + // Hooks + let hook_str = match &agent.hooks { + HookStatus::Installed { version, integrity } => { + let ver = version + .as_deref() + .map(|v| format!(", v{v}")) + .unwrap_or_default(); + format!("installed (integrity: {integrity}{ver})") + } + HookStatus::NotInstalled => "not installed".to_string(), + HookStatus::NotSupported { note } => format!("not supported ({note})"), + }; + println!( + " {:width$}hooks: {}", + "", + hook_str, + width = agent.kind.display_name().len() + 3, + ); + + // Rules + if let Some(ref rules) = agent.rules { + let status = if rules.exists { "found" } else { "not found" }; + println!( + " {:width$}rules: {} ({})", + "", + rules.path, + status, + width = agent.kind.display_name().len() + 3, + ); + } + } +} + +pub(super) fn print_json(agents: &[AgentStatus]) -> anyhow::Result<()> { + let agent_values: Vec = agents + .iter() + .map(|agent| { + let sessions = agent.sessions.as_ref().map(|s| { + serde_json::json!({ + "path": s.path, + "detail": s.detail, + }) + }); + + let hooks = match &agent.hooks { + HookStatus::Installed { version, integrity } => serde_json::json!({ + "status": "installed", + "version": version, + "integrity": integrity, + }), + HookStatus::NotInstalled => serde_json::json!({ + "status": "not_installed", + }), + HookStatus::NotSupported { note } => serde_json::json!({ + "status": "not_supported", + "note": note, + }), + }; + + let rules = agent.rules.as_ref().map(|r| { + serde_json::json!({ + "path": r.path, + "exists": r.exists, + }) + }); + + serde_json::json!({ + "name": agent.kind.display_name(), + "cli_name": agent.kind.cli_name(), + "detected": agent.detected, + "sessions": sessions, + "hooks": hooks, + "rules": rules, + }) + }) + .collect(); + + let output = serde_json::json!({ "agents": agent_values }); + println!("{}", serde_json::to_string_pretty(&output)?); + Ok(()) +} + +pub(super) fn print_help() { + println!("skim agents"); + println!(); + println!(" Display detected AI agents and their integration status"); + println!(); + println!("Usage: skim agents [OPTIONS]"); + println!(); + println!("Options:"); + println!(" --json Output as JSON"); + println!(" --help Print this help message"); +} diff --git a/crates/rskim/src/cmd/agents/mod.rs b/crates/rskim/src/cmd/agents/mod.rs new file mode 100644 index 0000000..c77352c --- /dev/null +++ b/crates/rskim/src/cmd/agents/mod.rs @@ -0,0 +1,140 @@ +//! `skim agents` -- display detected AI agents and their hook/session status. +//! +//! Scans for known AI coding agents (Claude Code, Cursor, Codex CLI, Gemini CLI, +//! Copilot CLI) and reports their detection status, session paths, hook installation +//! status, and rules directory presence. + +mod detection; +mod formatting; +mod types; +mod util; + +use std::process::ExitCode; + +use detection::detect_all_agents; +use formatting::{print_help, print_json, print_text}; + +/// Run the `skim agents` subcommand. +pub(crate) fn run(args: &[String]) -> anyhow::Result { + if args.iter().any(|a| matches!(a.as_str(), "--help" | "-h")) { + print_help(); + return Ok(ExitCode::SUCCESS); + } + + let json_output = args.iter().any(|a| a == "--json"); + + let agents = detect_all_agents(); + + if json_output { + print_json(&agents)?; + } else { + print_text(&agents); + } + + Ok(ExitCode::SUCCESS) +} + +/// Build the clap `Command` definition for shell completions. +pub(super) fn command() -> clap::Command { + clap::Command::new("agents") + .about("Display detected AI agents and their integration status") + .arg( + clap::Arg::new("json") + .long("json") + .action(clap::ArgAction::SetTrue) + .help("Output as JSON"), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cmd::session::AgentKind; + use types::HookStatus; + + #[test] + fn test_agents_run_no_crash() { + let result = run(&[]); + assert!(result.is_ok()); + } + + #[test] + fn test_agents_help_flag() { + let result = run(&["--help".to_string()]); + assert!(result.is_ok()); + } + + #[test] + fn test_agents_json_output_valid_json() { + let agents = detect_all_agents(); + assert_eq!( + agents.len(), + AgentKind::all_supported().len(), + "agent count should match supported kinds" + ); + + let result = run(&["--json".to_string()]); + assert!(result.is_ok()); + + for agent in &agents { + match &agent.hooks { + HookStatus::Installed { integrity, .. } => { + assert!( + ["ok", "tampered", "missing", "unknown"].contains(integrity), + "unexpected integrity value: {integrity}" + ); + } + HookStatus::NotInstalled => {} + HookStatus::NotSupported { note } => { + assert!(!note.is_empty(), "NotSupported note should not be empty"); + } + } + } + } + + #[test] + fn test_hook_status_display() { + let installed = HookStatus::Installed { + version: Some("2.0.0".to_string()), + integrity: "ok", + }; + match &installed { + HookStatus::Installed { version, integrity } => { + assert_eq!(version.as_deref(), Some("2.0.0")); + assert_eq!(*integrity, "ok"); + } + _ => panic!("expected Installed"), + } + + let not_supported = HookStatus::NotSupported { + note: "experimental", + }; + match ¬_supported { + HookStatus::NotSupported { note } => { + assert_eq!(*note, "experimental"); + } + _ => panic!("expected NotSupported"), + } + } + + #[test] + fn test_agent_kind_cli_name() { + assert_eq!(AgentKind::ClaudeCode.cli_name(), "claude-code"); + assert_eq!(AgentKind::Cursor.cli_name(), "cursor"); + assert_eq!(AgentKind::CodexCli.cli_name(), "codex"); + assert_eq!(AgentKind::GeminiCli.cli_name(), "gemini"); + assert_eq!(AgentKind::CopilotCli.cli_name(), "copilot"); + assert_eq!(AgentKind::OpenCode.cli_name(), "opencode"); + } + + #[test] + fn test_agent_kind_all_supported() { + let all = AgentKind::all_supported(); + assert!(all.len() >= 5, "expected at least 5 agents"); + assert!(all.contains(&AgentKind::ClaudeCode)); + assert!(all.contains(&AgentKind::Cursor)); + assert!(all.contains(&AgentKind::CodexCli)); + assert!(all.contains(&AgentKind::GeminiCli)); + assert!(all.contains(&AgentKind::CopilotCli)); + } +} diff --git a/crates/rskim/src/cmd/agents/types.rs b/crates/rskim/src/cmd/agents/types.rs new file mode 100644 index 0000000..cbe1280 --- /dev/null +++ b/crates/rskim/src/cmd/agents/types.rs @@ -0,0 +1,37 @@ +//! Agent detection types used by the `skim agents` subcommand. + +use crate::cmd::session::AgentKind; + +/// Detected agent status report. +pub(super) struct AgentStatus { + pub(super) kind: AgentKind, + pub(super) detected: bool, + pub(super) sessions: Option, + pub(super) hooks: HookStatus, + pub(super) rules: Option, +} + +/// Session file information. +pub(super) struct SessionInfo { + pub(super) path: String, + pub(super) detail: String, // e.g., "42 files" or "1.2 GB" +} + +/// Hook installation status. +#[derive(Debug)] +pub(super) enum HookStatus { + Installed { + version: Option, + integrity: &'static str, + }, + NotInstalled, + NotSupported { + note: &'static str, + }, +} + +/// Rules directory information. +pub(super) struct RulesInfo { + pub(super) path: String, + pub(super) exists: bool, +} diff --git a/crates/rskim/src/cmd/agents/util.rs b/crates/rskim/src/cmd/agents/util.rs new file mode 100644 index 0000000..dd8fa0f --- /dev/null +++ b/crates/rskim/src/cmd/agents/util.rs @@ -0,0 +1,154 @@ +//! Utility helpers for the `skim agents` subcommand. + +use std::path::Path; + +/// Replace home directory prefix with ~ for display. +pub(super) fn tilde_path(path: &Path) -> String { + if let Some(home) = dirs::home_dir() { + if let Ok(stripped) = path.strip_prefix(&home) { + return format!("~/{}", stripped.display()); + } + } + path.display().to_string() +} + +/// Maximum directory traversal depth for recursive helpers. +pub(super) const MAX_TRAVERSAL_DEPTH: usize = 10; + +/// Count files with a specific extension recursively in a directory. +pub(super) fn count_files_recursive(dir: &Path, extension: &str) -> usize { + count_files_recursive_inner(dir, extension, 0) +} + +fn count_files_recursive_inner(dir: &Path, extension: &str, depth: usize) -> usize { + if depth >= MAX_TRAVERSAL_DEPTH { + return 0; + } + let mut count = 0; + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + let ft = match entry.file_type() { + Ok(ft) => ft, + Err(_) => continue, + }; + if ft.is_dir() { + count += count_files_recursive_inner(&entry.path(), extension, depth + 1); + } else if ft.is_file() + && entry.path().extension().and_then(|e| e.to_str()) == Some(extension) + { + count += 1; + } + } + } + count +} + +/// Count files (non-directories) directly in a directory. +pub(super) fn count_files_in_dir(dir: &Path) -> usize { + std::fs::read_dir(dir) + .ok() + .map(|entries| { + entries + .flatten() + .filter(|e| e.file_type().is_ok_and(|ft| ft.is_file())) + .count() + }) + .unwrap_or(0) +} + +/// Get human-readable size of a directory. +pub(super) fn dir_size_human(dir: &Path) -> String { + let bytes = dir_size_bytes(dir); + if bytes >= 1_073_741_824 { + format!("{:.1} GB", bytes as f64 / 1_073_741_824.0) + } else if bytes >= 1_048_576 { + format!("{:.1} MB", bytes as f64 / 1_048_576.0) + } else if bytes >= 1024 { + format!("{:.1} KB", bytes as f64 / 1024.0) + } else { + format!("{bytes} bytes") + } +} + +/// Calculate total size of all files in a directory tree. +fn dir_size_bytes(dir: &Path) -> u64 { + dir_size_bytes_inner(dir, 0) +} + +fn dir_size_bytes_inner(dir: &Path, depth: usize) -> u64 { + if depth >= MAX_TRAVERSAL_DEPTH { + return 0; + } + let mut total: u64 = 0; + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + let ft = match entry.file_type() { + Ok(ft) => ft, + Err(_) => continue, + }; + if ft.is_dir() { + total += dir_size_bytes_inner(&entry.path(), depth + 1); + } else if let Ok(meta) = entry.metadata() { + total += meta.len(); + } + } + } + total +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn test_tilde_path_with_home() { + if let Some(home) = dirs::home_dir() { + let path = home.join("some").join("path"); + let result = tilde_path(&path); + assert!( + result.starts_with("~/"), + "expected ~/ prefix, got: {result}" + ); + assert!( + result.contains("some/path"), + "expected path suffix, got: {result}" + ); + } + } + + #[test] + fn test_tilde_path_without_home_prefix() { + let path = PathBuf::from("/tmp/not-home/file"); + let result = tilde_path(&path); + assert_eq!(result, "/tmp/not-home/file"); + } + + #[test] + fn test_count_files_recursive_empty_dir() { + let dir = tempfile::TempDir::new().unwrap(); + assert_eq!(count_files_recursive(dir.path(), "jsonl"), 0); + } + + #[test] + fn test_count_files_recursive_with_files() { + let dir = tempfile::TempDir::new().unwrap(); + std::fs::write(dir.path().join("a.jsonl"), "{}").unwrap(); + std::fs::write(dir.path().join("b.jsonl"), "{}").unwrap(); + std::fs::write(dir.path().join("c.txt"), "hello").unwrap(); + let sub = dir.path().join("subdir"); + std::fs::create_dir(&sub).unwrap(); + std::fs::write(sub.join("d.jsonl"), "{}").unwrap(); + assert_eq!(count_files_recursive(dir.path(), "jsonl"), 3); + } + + #[test] + fn test_dir_size_human_formats() { + let dir = tempfile::TempDir::new().unwrap(); + let size = dir_size_human(dir.path()); + assert!( + size.contains("bytes") || size.contains("KB"), + "unexpected size format: {size}" + ); + } +} diff --git a/crates/rskim/src/cmd/completions.rs b/crates/rskim/src/cmd/completions.rs index 3f8dee8..ffd5784 100644 --- a/crates/rskim/src/cmd/completions.rs +++ b/crates/rskim/src/cmd/completions.rs @@ -67,21 +67,22 @@ fn build_full_command() -> Command { ); cmd = cmd.subcommand(completions_sub); - // Add the rewrite subcommand (definition lives in rewrite.rs to avoid duplication) + // Add subcommands with full arg definitions for accurate completions + cmd = cmd.subcommand(super::agents::command()); cmd = cmd.subcommand(super::rewrite::command()); - - // Add the init subcommand (definition lives in init.rs to avoid duplication) cmd = cmd.subcommand(super::init::command()); - - // Add the discover subcommand (definition lives in discover.rs to avoid duplication) cmd = cmd.subcommand(super::discover::command()); - - // Add the learn subcommand (definition lives in learn.rs to avoid duplication) cmd = cmd.subcommand(super::learn::command()); - // Subcommands with full arg definitions added above — skip in the stub loop. - const IMPLEMENTED_SUBCOMMANDS: &[&str] = - &["completions", "discover", "init", "learn", "rewrite"]; + // Subcommands with full arg definitions added above -- skip in the stub loop. + const IMPLEMENTED_SUBCOMMANDS: &[&str] = &[ + "agents", + "completions", + "discover", + "init", + "learn", + "rewrite", + ]; // Add stub subcommands for all OTHER known subcommands for name in super::KNOWN_SUBCOMMANDS { diff --git a/crates/rskim/src/cmd/discover.rs b/crates/rskim/src/cmd/discover.rs index ef6d408..f4fd156 100644 --- a/crates/rskim/src/cmd/discover.rs +++ b/crates/rskim/src/cmd/discover.rs @@ -49,7 +49,9 @@ pub(crate) fn run(args: &[String]) -> anyhow::Result { Ok(ExitCode::SUCCESS) } -// ---- Config ---- +// ============================================================================ +// Config +// ============================================================================ #[derive(Debug)] struct DiscoverConfig { @@ -93,9 +95,7 @@ fn parse_args(args: &[String]) -> anyhow::Result { if i >= args.len() { anyhow::bail!("--agent requires a value (e.g., claude-code)"); } - config.agent_filter = Some(AgentKind::from_str(&args[i]).ok_or_else(|| { - anyhow::anyhow!("unknown agent: '{}'\nSupported: claude-code", &args[i]) - })?); + config.agent_filter = Some(AgentKind::parse_cli_arg(&args[i])?); } "--json" => { config.json_output = true; @@ -112,7 +112,9 @@ fn parse_args(args: &[String]) -> anyhow::Result { Ok(config) } -// ---- Analysis ---- +// ============================================================================ +// Analysis +// ============================================================================ struct DiscoverAnalysis { total_invocations: usize, @@ -170,9 +172,14 @@ fn analyze_invocations(invocations: &[ToolInvocation]) -> DiscoverAnalysis { }); } ToolInput::Bash { command } => { + // Skip commands already rewritten by the hook (start with "skim ") + if command.starts_with("skim ") { + continue; + } + // Check if this command has a skim rewrite let tokens: Vec<&str> = command.split_whitespace().collect(); - let has_rewrite = !tokens.is_empty() && check_has_rewrite(&tokens); + let has_rewrite = check_has_rewrite(&tokens); let rewrite_target = if has_rewrite { get_rewrite_target(&tokens) } else { @@ -270,7 +277,9 @@ fn get_rewrite_target(tokens: &[&str]) -> Option { } } -// ---- Output ---- +// ============================================================================ +// Output +// ============================================================================ fn print_text_report(analysis: &DiscoverAnalysis) { println!("skim discover -- optimization opportunities\n"); @@ -406,7 +415,9 @@ fn print_json_report(analysis: &DiscoverAnalysis) -> anyhow::Result<()> { Ok(()) } -// ---- Help ---- +// ============================================================================ +// Help +// ============================================================================ fn print_help() { println!("skim discover"); @@ -417,6 +428,8 @@ fn print_help() { println!(); println!("Options:"); println!(" --since Time window (e.g., 24h, 7d, 1w) [default: 24h]"); + println!(" (24h default suits recent-session exploration;"); + println!(" use --since 7d for broader analysis)"); println!(" --session latest Only scan the most recent session"); println!(" --agent Only scan sessions from a specific agent"); println!(" --json Output machine-readable JSON"); @@ -433,7 +446,9 @@ fn print_help() { println!(" skim discover --json Machine-readable output"); } -// ---- Clap command for completions ---- +// ============================================================================ +// Clap command for completions +// ============================================================================ pub(super) fn command() -> clap::Command { clap::Command::new("discover") @@ -442,7 +457,7 @@ pub(super) fn command() -> clap::Command { clap::Arg::new("since") .long("since") .value_name("DURATION") - .help("Time window (e.g., 24h, 7d, 1w)"), + .help("Time window (e.g., 24h, 7d, 1w) [default: 24h]"), ) .arg( clap::Arg::new("session") @@ -586,4 +601,49 @@ mod tests { let config = parse_args(&["--since".to_string(), "7d".to_string()]).unwrap(); assert!(config.since.is_some()); } + + // ---- analyze_invocations: skim command exclusion ---- + + fn make_bash_invocation(command: &str) -> ToolInvocation { + ToolInvocation { + tool_name: "Bash".to_string(), + input: ToolInput::Bash { + command: command.to_string(), + }, + timestamp: "2024-01-01T00:00:00Z".to_string(), + session_id: "sess1".to_string(), + agent: AgentKind::ClaudeCode, + result: Some(session::ToolResult { + content: "output".to_string(), + is_error: false, + }), + } + } + + #[test] + fn test_analyze_excludes_already_rewritten_commands() { + // Commands starting with "skim " should NOT be counted as rewritable + let inv1 = make_bash_invocation("skim test cargo --nocapture"); + let inv2 = make_bash_invocation("skim build clippy"); + let inv3 = make_bash_invocation("cargo test"); // this one IS rewritable + let invocations = vec![inv1, inv2, inv3]; + + let analysis = analyze_invocations(&invocations); + + // Only "cargo test" should be in bash_commands, not the skim commands + assert_eq!(analysis.bash_commands.len(), 1); + assert_eq!(analysis.bash_commands[0].command, "cargo test"); + assert!(analysis.bash_commands[0].has_rewrite); + } + + #[test] + fn test_analyze_counts_non_skim_commands() { + let inv1 = make_bash_invocation("ls -la"); + let inv2 = make_bash_invocation("cargo test"); + let invocations = vec![inv1, inv2]; + + let analysis = analyze_invocations(&invocations); + + assert_eq!(analysis.bash_commands.len(), 2); + } } diff --git a/crates/rskim/src/cmd/hook_log.rs b/crates/rskim/src/cmd/hook_log.rs new file mode 100644 index 0000000..40bcd14 --- /dev/null +++ b/crates/rskim/src/cmd/hook_log.rs @@ -0,0 +1,281 @@ +//! Log file for hook-mode diagnostics (#57). +//! +//! CRITICAL DESIGN CONSTRAINT: Hook-mode warnings MUST go to a log file, +//! NEVER to stderr. Claude Code treats stderr+exit(0) as an error +//! (GRANITE #361 Bug 3). This module provides a file-based logging path +//! that is safe for hook execution context. +//! +//! Log location: `~/.cache/skim/hook.log` +//! Rotation: 1 MB max, 3 archived copies (`.1`, `.2`, `.3`) + +use std::io::Write; +use std::path::Path; + +/// Maximum log file size before rotation (1 MB). +const MAX_LOG_SIZE: u64 = 1024 * 1024; + +/// Maximum number of archive files to keep. +const MAX_ARCHIVES: u32 = 3; + +/// Log a warning to `~/.cache/skim/hook.log` with rotation. +/// +/// NEVER outputs to stderr -- safe for use in hook execution context. +/// All failures are silently ignored to never break the hook. +pub(crate) fn log_hook_warning(message: &str) { + let log_path = match cache_dir() { + Some(dir) => dir.join("hook.log"), + None => return, + }; + + // Ensure cache directory exists + let _ = std::fs::create_dir_all(log_path.parent().unwrap_or(Path::new("."))); + + // Rotate if needed before appending + rotate_if_needed(&log_path); + + // Append the warning with timestamp + if let Ok(mut file) = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(&log_path) + { + let timestamp = timestamp_string(); + let _ = writeln!(file, "[{timestamp}] {message}"); + } +} + +/// Rotate log file if it exceeds [`MAX_LOG_SIZE`]. +/// +/// Shift scheme: delete `.3`, rename `.2` -> `.3`, `.1` -> `.2`, current -> `.1`. +fn rotate_if_needed(log_path: &Path) { + let size = std::fs::metadata(log_path).map(|m| m.len()).unwrap_or(0); + if size < MAX_LOG_SIZE { + return; + } + + // Shift archives: .3 is deleted, .2 -> .3, .1 -> .2 + for i in (1..MAX_ARCHIVES).rev() { + let from = archive_path(log_path, i); + let to = archive_path(log_path, i + 1); + let _ = std::fs::rename(&from, &to); + } + + // Current -> .1 + let archive_1 = archive_path(log_path, 1); + let _ = std::fs::rename(log_path, &archive_1); +} + +/// Build the path for an archive file (e.g., `hook.log.1`, `hook.log.2`). +fn archive_path(log_path: &Path, index: u32) -> std::path::PathBuf { + let mut path = log_path.as_os_str().to_owned(); + path.push(format!(".{index}")); + std::path::PathBuf::from(path) +} + +/// Get the skim cache directory, respecting `$SKIM_CACHE_DIR` override and +/// platform conventions. +/// +/// Priority: `SKIM_CACHE_DIR` env > `dirs::cache_dir()/skim`. +/// The env override enables test isolation on all platforms (especially macOS +/// where `dirs::cache_dir()` ignores `$XDG_CACHE_HOME`). +pub(super) fn cache_dir() -> Option { + if let Ok(dir) = std::env::var("SKIM_CACHE_DIR") { + return Some(std::path::PathBuf::from(dir)); + } + dirs::cache_dir().map(|c| c.join("skim")) +} + +/// Generate a timestamp string in ISO-8601 format (UTC approximation). +/// +/// Uses `days_to_date` (Howard Hinnant calendar algorithm) to avoid +/// pulling in chrono. Includes hour:minute:second for log granularity. +fn timestamp_string() -> String { + let now = std::time::SystemTime::now(); + let secs = now + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let days = secs / 86400; + let day_secs = secs % 86400; + let (year, month, day) = days_to_date(days); + let hour = day_secs / 3600; + let minute = (day_secs % 3600) / 60; + let second = day_secs % 60; + format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z") +} + +/// Convert days since Unix epoch to (year, month, day). +/// Algorithm from http://howardhinnant.github.io/date_algorithms.html +pub(super) fn days_to_date(days_since_epoch: u64) -> (u64, u64, u64) { + let z = days_since_epoch + 719468; + let era = z / 146097; + let doe = z - era * 146097; + let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; + let y = yoe + era * 400; + let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); + let mp = (5 * doy + 2) / 153; + let d = doy - (153 * mp + 2) / 5 + 1; + let m = if mp < 10 { mp + 3 } else { mp - 9 }; + let y = if m <= 2 { y + 1 } else { y }; + (y, m, d) +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_log_rotation_at_1mb() { + let dir = tempfile::TempDir::new().unwrap(); + let log_path = dir.path().join("hook.log"); + + // Create a log file just over 1 MB + let content = "x".repeat(MAX_LOG_SIZE as usize + 100); + std::fs::write(&log_path, &content).unwrap(); + + // Trigger rotation + rotate_if_needed(&log_path); + + // Original should be gone, archive .1 should exist + assert!( + !log_path.exists(), + "Original log should be renamed during rotation" + ); + let archive1 = archive_path(&log_path, 1); + assert!(archive1.exists(), "Archive .1 should exist after rotation"); + + // Verify archive content matches original + let archived_content = std::fs::read_to_string(&archive1).unwrap(); + assert_eq!(archived_content, content); + } + + #[test] + fn test_rotation_shifts_existing_archives() { + let dir = tempfile::TempDir::new().unwrap(); + let log_path = dir.path().join("hook.log"); + + // Create existing archives + std::fs::write(archive_path(&log_path, 1), "archive 1 content").unwrap(); + std::fs::write(archive_path(&log_path, 2), "archive 2 content").unwrap(); + + // Create an oversized current log + let big_content = "y".repeat(MAX_LOG_SIZE as usize + 1); + std::fs::write(&log_path, &big_content).unwrap(); + + rotate_if_needed(&log_path); + + // .1 should now contain old current log + let a1 = std::fs::read_to_string(archive_path(&log_path, 1)).unwrap(); + assert_eq!(a1, big_content); + + // .2 should contain old .1 + let a2 = std::fs::read_to_string(archive_path(&log_path, 2)).unwrap(); + assert_eq!(a2, "archive 1 content"); + + // .3 should contain old .2 + let a3 = std::fs::read_to_string(archive_path(&log_path, 3)).unwrap(); + assert_eq!(a3, "archive 2 content"); + } + + #[test] + fn test_rotation_not_triggered_under_limit() { + let dir = tempfile::TempDir::new().unwrap(); + let log_path = dir.path().join("hook.log"); + + // Create a small log file + std::fs::write(&log_path, "small log entry\n").unwrap(); + + rotate_if_needed(&log_path); + + // File should still exist unchanged + assert!(log_path.exists(), "Small log should not be rotated"); + assert!( + !archive_path(&log_path, 1).exists(), + "No archive should be created" + ); + } + + #[test] + fn test_rotation_missing_file_is_noop() { + let dir = tempfile::TempDir::new().unwrap(); + let log_path = dir.path().join("nonexistent.log"); + + // Should not panic or error + rotate_if_needed(&log_path); + + assert!(!log_path.exists()); + } + + #[test] + fn test_timestamp_string_format() { + let ts = timestamp_string(); + // Should match ISO-8601 pattern: YYYY-MM-DDTHH:MM:SSZ + assert_eq!(ts.len(), 20, "Timestamp should be 20 chars: {ts}"); + assert!(ts.ends_with('Z'), "Timestamp should end with Z: {ts}"); + assert_eq!(&ts[4..5], "-", "Dash after year: {ts}"); + assert_eq!(&ts[7..8], "-", "Dash after month: {ts}"); + assert_eq!(&ts[10..11], "T", "T separator: {ts}"); + assert_eq!(&ts[13..14], ":", "Colon after hour: {ts}"); + assert_eq!(&ts[16..17], ":", "Colon after minute: {ts}"); + } + + #[test] + fn test_archive_path_format() { + let log = std::path::PathBuf::from("/tmp/hook.log"); + assert_eq!( + archive_path(&log, 1), + std::path::PathBuf::from("/tmp/hook.log.1") + ); + assert_eq!( + archive_path(&log, 3), + std::path::PathBuf::from("/tmp/hook.log.3") + ); + } + + #[test] + fn test_log_hook_warning_triggers_rotation() { + // End-to-end: call log_hook_warning with a >1MB log file already in place. + // Verifies that log_hook_warning rotates the existing file to .1 and + // creates a fresh hook.log with the new message. + let dir = tempfile::TempDir::new().unwrap(); + let cache = dir.path().join("skim-cache"); + std::fs::create_dir_all(&cache).unwrap(); + + // Pre-fill hook.log just over the rotation threshold + let log_path = cache.join("hook.log"); + let big_content = "z".repeat(MAX_LOG_SIZE as usize + 100); + std::fs::write(&log_path, &big_content).unwrap(); + + // Override SKIM_CACHE_DIR so log_hook_warning writes to our temp dir + std::env::set_var("SKIM_CACHE_DIR", &cache); + log_hook_warning("rotation integration test"); + std::env::remove_var("SKIM_CACHE_DIR"); + + // The old oversized log should be archived to .1 + let archive1 = archive_path(&log_path, 1); + assert!( + archive1.exists(), + "Archive .1 should exist after rotation triggered by log_hook_warning" + ); + let archived = std::fs::read_to_string(&archive1).unwrap(); + assert_eq!( + archived, big_content, + "Archive .1 should contain the original oversized content" + ); + + // The new hook.log should contain the freshly written message + assert!( + log_path.exists(), + "hook.log should be recreated after rotation" + ); + let new_content = std::fs::read_to_string(&log_path).unwrap(); + assert!( + new_content.contains("rotation integration test"), + "New hook.log should contain the warning message, got: {new_content}" + ); + } +} diff --git a/crates/rskim/src/cmd/hooks/claude.rs b/crates/rskim/src/cmd/hooks/claude.rs new file mode 100644 index 0000000..297474b --- /dev/null +++ b/crates/rskim/src/cmd/hooks/claude.rs @@ -0,0 +1,145 @@ +//! Claude Code hook protocol implementation. +//! +//! Claude Code uses PreToolUse hooks. The hook reads JSON from stdin, +//! extracts tool_input.command, rewrites if matched, and emits +//! hookSpecificOutput with updatedInput. Never sets permissionDecision. + +use super::{HookInput, HookProtocol, HookSupport}; +use crate::cmd::session::AgentKind; + +/// Claude Code hook implementation (PreToolUse hooks). +pub(crate) struct ClaudeCodeHook; + +impl HookProtocol for ClaudeCodeHook { + fn agent_kind(&self) -> AgentKind { + AgentKind::ClaudeCode + } + + fn hook_support(&self) -> HookSupport { + HookSupport::RealHook + } + + fn parse_input(&self, json: &serde_json::Value) -> Option { + super::parse_tool_input_command(json) + } + + fn format_response(&self, rewritten_command: &str) -> serde_json::Value { + serde_json::json!({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "updatedInput": { + "command": rewritten_command + } + } + }) + } + + fn generate_script(&self, binary_path: &str, version: &str) -> String { + super::generate_hook_script(binary_path, version, "claude-code") + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::cmd::hooks::{InstallOpts, UninstallOpts}; + + fn hook() -> ClaudeCodeHook { + ClaudeCodeHook + } + + #[test] + fn test_claude_agent_kind() { + assert_eq!(hook().agent_kind(), AgentKind::ClaudeCode); + } + + #[test] + fn test_claude_hook_support() { + assert_eq!(hook().hook_support(), HookSupport::RealHook); + } + + #[test] + fn test_claude_parse_input_valid() { + let json = serde_json::json!({ + "tool_input": { + "command": "cargo test --nocapture" + } + }); + let result = hook().parse_input(&json); + assert!(result.is_some()); + assert_eq!(result.unwrap().command, "cargo test --nocapture"); + } + + #[test] + fn test_claude_parse_input_missing_tool_input() { + let json = serde_json::json!({}); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_claude_parse_input_missing_command() { + let json = serde_json::json!({ + "tool_input": { + "file_path": "/tmp/test.rs" + } + }); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_claude_format_response() { + let response = hook().format_response("skim test cargo"); + let output = response.get("hookSpecificOutput").unwrap(); + assert_eq!(output["hookEventName"], "PreToolUse"); + assert_eq!(output["updatedInput"]["command"], "skim test cargo"); + } + + #[test] + fn test_claude_format_response_no_permission_decision() { + let response = hook().format_response("skim test cargo"); + // SECURITY: Must never set permissionDecision + assert!(response.get("permissionDecision").is_none()); + } + + #[test] + fn test_claude_generate_script() { + let script = hook().generate_script("/usr/local/bin/skim", "1.0.0"); + assert!(script.contains("#!/usr/bin/env bash")); + assert!(script.contains("# skim-hook v1.0.0")); + assert!(script.contains("SKIM_HOOK_VERSION=\"1.0.0\"")); + assert!(script.contains("exec \"/usr/local/bin/skim\" rewrite --hook --agent claude-code")); + } + + #[test] + fn test_claude_generate_script_init_comment() { + let script = hook().generate_script("/usr/local/bin/skim", "1.0.0"); + assert!(script.contains("skim init --agent claude-code")); + } + + #[test] + fn test_claude_install_default() { + let opts = InstallOpts { + binary_path: "/usr/local/bin/skim".into(), + version: "1.0.0".into(), + config_dir: "/tmp/.claude".into(), + project_scope: false, + dry_run: false, + }; + let result = hook().install(&opts).unwrap(); + assert!(result.script_path.is_none()); + assert!(!result.config_patched); + } + + #[test] + fn test_claude_uninstall_default() { + let opts = UninstallOpts { + config_dir: "/tmp/.claude".into(), + force: false, + }; + assert!(hook().uninstall(&opts).is_ok()); + } +} diff --git a/crates/rskim/src/cmd/hooks/codex.rs b/crates/rskim/src/cmd/hooks/codex.rs new file mode 100644 index 0000000..635488b --- /dev/null +++ b/crates/rskim/src/cmd/hooks/codex.rs @@ -0,0 +1,101 @@ +//! Codex CLI hook protocol implementation (awareness-only). +//! +//! Codex CLI has no PreToolUse hook equivalent. This implementation +//! returns awareness-only support with no-op methods for all hook operations. + +use super::{HookProtocol, HookSupport}; +use crate::cmd::session::AgentKind; + +/// Codex CLI awareness-only hook (no PreToolUse equivalent). +pub(crate) struct CodexCliHook; + +impl HookProtocol for CodexCliHook { + fn agent_kind(&self) -> AgentKind { + AgentKind::CodexCli + } + + fn hook_support(&self) -> HookSupport { + HookSupport::AwarenessOnly + } + + fn parse_input(&self, _json: &serde_json::Value) -> Option { + None + } + + fn format_response(&self, _rewritten_command: &str) -> serde_json::Value { + serde_json::Value::Null + } + + fn generate_script(&self, _binary_path: &str, _version: &str) -> String { + String::new() + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::cmd::hooks::{InstallOpts, UninstallOpts}; + + fn hook() -> CodexCliHook { + CodexCliHook + } + + #[test] + fn test_codex_hook_support_is_awareness() { + assert_eq!(hook().hook_support(), HookSupport::AwarenessOnly); + } + + #[test] + fn test_codex_parse_input_returns_none() { + let json = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_codex_format_response_returns_null() { + let response = hook().format_response("skim test cargo"); + assert!(response.is_null()); + } + + #[test] + fn test_codex_agent_kind() { + assert_eq!(hook().agent_kind(), AgentKind::CodexCli); + } + + #[test] + fn test_codex_generate_script_empty() { + let script = hook().generate_script("/usr/local/bin/skim", "1.0.0"); + assert!(script.is_empty()); + } + + #[test] + fn test_codex_install_noop() { + let opts = InstallOpts { + binary_path: "/usr/local/bin/skim".into(), + version: "1.0.0".into(), + config_dir: "/tmp/.codex".into(), + project_scope: false, + dry_run: false, + }; + let result = hook().install(&opts).unwrap(); + assert!(result.script_path.is_none()); + assert!(!result.config_patched); + } + + #[test] + fn test_codex_uninstall_noop() { + let opts = UninstallOpts { + config_dir: "/tmp/.codex".into(), + force: false, + }; + assert!(hook().uninstall(&opts).is_ok()); + } +} diff --git a/crates/rskim/src/cmd/hooks/copilot.rs b/crates/rskim/src/cmd/hooks/copilot.rs new file mode 100644 index 0000000..8d9ce65 --- /dev/null +++ b/crates/rskim/src/cmd/hooks/copilot.rs @@ -0,0 +1,172 @@ +//! Copilot CLI hook protocol implementation. +//! +//! Copilot CLI uses preToolUse hooks. The hook reads JSON from stdin, +//! extracts tool_input.command, rewrites if matched, and emits a +//! deny-with-suggestion response. +//! +//! ARCHITECTURE NOTE: Copilot's `allow` + `updatedInput` is currently broken. +//! Only `deny` works reliably. We use deny-with-suggestion: the deny reason +//! contains the optimized command for the user to accept manually. +//! +//! UPGRADE PATH: When Copilot ships working `allow` + `updatedInput`, +//! change `format_response` only (one-file change). + +use super::{HookInput, HookProtocol, HookSupport}; +use crate::cmd::session::AgentKind; + +/// Copilot CLI hook implementation (preToolUse hooks, deny-with-suggestion). +pub(crate) struct CopilotCliHook; + +impl HookProtocol for CopilotCliHook { + fn agent_kind(&self) -> AgentKind { + AgentKind::CopilotCli + } + + fn hook_support(&self) -> HookSupport { + HookSupport::RealHook + } + + fn parse_input(&self, json: &serde_json::Value) -> Option { + super::parse_tool_input_command(json) + } + + fn format_response(&self, rewritten_command: &str) -> serde_json::Value { + // Deny-with-suggestion: Copilot's `allow` + `updatedInput` is broken. + // When `allow` ships, change this to: + // { "permissionDecision": "allow", "updatedInput": { "command": rewritten_command } } + serde_json::json!({ + "permissionDecision": "deny", + "reason": format!("Use optimized command: {}", rewritten_command) + }) + } + + fn generate_script(&self, binary_path: &str, version: &str) -> String { + super::generate_hook_script(binary_path, version, "copilot") + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::cmd::hooks::{InstallOpts, UninstallOpts}; + + fn hook() -> CopilotCliHook { + CopilotCliHook + } + + #[test] + fn test_copilot_hook_is_real() { + assert_eq!(hook().hook_support(), HookSupport::RealHook); + } + + #[test] + fn test_copilot_parse_input() { + let json = serde_json::json!({ + "tool_input": { + "command": "cargo test --all" + } + }); + let result = hook().parse_input(&json); + assert!(result.is_some()); + assert_eq!(result.unwrap().command, "cargo test --all"); + } + + #[test] + fn test_copilot_parse_input_missing_tool_input() { + let json = serde_json::json!({}); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_copilot_parse_input_missing_command() { + let json = serde_json::json!({ + "tool_input": { + "file_path": "/tmp/test.rs" + } + }); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_copilot_format_response_is_deny() { + let response = hook().format_response("skim test cargo"); + assert_eq!(response["permissionDecision"], "deny"); + } + + #[test] + fn test_copilot_format_response_includes_command_in_reason() { + let response = hook().format_response("skim test cargo"); + let reason = response["reason"].as_str().unwrap(); + assert!( + reason.contains("skim test cargo"), + "reason should contain the rewritten command, got: {reason}" + ); + assert!( + reason.starts_with("Use optimized command:"), + "reason should start with prefix, got: {reason}" + ); + } + + #[test] + fn test_copilot_format_response_no_allow() { + let response = hook().format_response("skim test cargo"); + // Must be "deny", never "allow" (Copilot's allow is broken) + assert_ne!( + response["permissionDecision"].as_str().unwrap(), + "allow", + "permissionDecision must be 'deny' until Copilot fixes 'allow'" + ); + } + + #[test] + fn test_copilot_format_response_no_hook_specific_output() { + let response = hook().format_response("skim test cargo"); + // Copilot uses deny-with-suggestion, not hookSpecificOutput + assert!( + response.get("hookSpecificOutput").is_none(), + "copilot should not use hookSpecificOutput" + ); + } + + #[test] + fn test_copilot_generate_script() { + let script = hook().generate_script("/usr/local/bin/skim", "2.0.0"); + assert!(script.contains("#!/usr/bin/env bash")); + assert!(script.contains("# skim-hook v2.0.0")); + assert!(script.contains("skim init --agent copilot")); + assert!(script.contains("SKIM_HOOK_VERSION=\"2.0.0\"")); + assert!(script.contains("exec \"/usr/local/bin/skim\" rewrite --hook --agent copilot")); + } + + #[test] + fn test_copilot_agent_kind() { + assert_eq!(hook().agent_kind(), AgentKind::CopilotCli); + } + + #[test] + fn test_copilot_install_default() { + let opts = InstallOpts { + binary_path: "/usr/local/bin/skim".into(), + version: "1.0.0".into(), + config_dir: "/tmp/.copilot".into(), + project_scope: false, + dry_run: false, + }; + let result = hook().install(&opts).unwrap(); + assert!(result.script_path.is_none()); + assert!(!result.config_patched); + } + + #[test] + fn test_copilot_uninstall_default() { + let opts = UninstallOpts { + config_dir: "/tmp/.copilot".into(), + force: false, + }; + assert!(hook().uninstall(&opts).is_ok()); + } +} diff --git a/crates/rskim/src/cmd/hooks/cursor.rs b/crates/rskim/src/cmd/hooks/cursor.rs new file mode 100644 index 0000000..f373e8b --- /dev/null +++ b/crates/rskim/src/cmd/hooks/cursor.rs @@ -0,0 +1,178 @@ +//! Cursor hook protocol implementation. +//! +//! Cursor uses `beforeShellExecution` hooks via `.cursor/hooks.json`. +//! The hook reads JSON with command at top level (not nested under +//! tool_input like Claude Code), rewrites if matched, and responds +//! with `{ "permission": "allow", "updated_input": { "command": ... } }`. + +use super::{HookInput, HookProtocol, HookSupport}; +use crate::cmd::session::AgentKind; + +/// Cursor hook implementation (`beforeShellExecution` via `.cursor/hooks.json`). +pub(crate) struct CursorHook; + +impl HookProtocol for CursorHook { + fn agent_kind(&self) -> AgentKind { + AgentKind::Cursor + } + + fn hook_support(&self) -> HookSupport { + HookSupport::RealHook + } + + fn parse_input(&self, json: &serde_json::Value) -> Option { + // Cursor puts command at top level, not nested under tool_input + let command = json.get("command").and_then(|c| c.as_str())?.to_string(); + Some(HookInput { command }) + } + + fn format_response(&self, rewritten_command: &str) -> serde_json::Value { + // SECURITY: "permission": "allow" is REQUIRED by Cursor's hook protocol. + // This is NOT the same as Claude Code's permissionDecision -- Cursor's + // protocol requires an explicit permission field in every hook response. + serde_json::json!({ + "permission": "allow", + "updated_input": { + "command": rewritten_command + } + }) + } + + fn generate_script(&self, binary_path: &str, version: &str) -> String { + super::generate_hook_script(binary_path, version, "cursor") + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::cmd::hooks::{InstallOpts, UninstallOpts}; + + fn hook() -> CursorHook { + CursorHook + } + + #[test] + fn test_cursor_hook_is_real() { + assert_eq!(hook().hook_support(), HookSupport::RealHook); + } + + #[test] + fn test_cursor_agent_kind() { + assert_eq!(hook().agent_kind(), AgentKind::Cursor); + } + + #[test] + fn test_cursor_parse_input() { + let json = serde_json::json!({ + "command": "cargo test --nocapture" + }); + let result = hook().parse_input(&json); + assert!(result.is_some()); + assert_eq!(result.unwrap().command, "cargo test --nocapture"); + } + + #[test] + fn test_cursor_parse_input_missing_command() { + let json = serde_json::json!({}); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_cursor_parse_input_non_string_command() { + let json = serde_json::json!({ + "command": 42 + }); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_cursor_format_response() { + let response = hook().format_response("skim test cargo"); + assert_eq!(response["permission"], "allow"); + assert_eq!(response["updated_input"]["command"], "skim test cargo"); + } + + #[test] + fn test_cursor_format_response_has_required_permission_field() { + // SECURITY: Cursor's hook protocol REQUIRES "permission": "allow" in + // every response. This is NOT Claude Code's permissionDecision -- it is + // a distinct, required field in Cursor's schema. + let response = hook().format_response("skim test cargo"); + assert_eq!( + response.get("permission").and_then(|v| v.as_str()), + Some("allow"), + "Cursor protocol requires 'permission' field set to 'allow'" + ); + } + + #[test] + fn test_cursor_format_response_no_hook_specific_output() { + // Cursor uses permission/updated_input, not hookSpecificOutput + let response = hook().format_response("skim test cargo"); + assert!(response.get("hookSpecificOutput").is_none()); + } + + #[test] + fn test_cursor_format_response_no_permission_decision() { + // Cursor must not emit Claude Code's permissionDecision field + let response = hook().format_response("skim test cargo"); + assert!( + response.get("permissionDecision").is_none(), + "Cursor response must not contain Claude Code's permissionDecision" + ); + } + + #[test] + fn test_cursor_generate_script_absolute_path() { + let script = hook().generate_script("/usr/local/bin/skim", "1.2.0"); + assert!(script.contains("#!/usr/bin/env bash")); + assert!(script.contains("# skim-hook v1.2.0")); + assert!(script.contains("SKIM_HOOK_VERSION=\"1.2.0\"")); + assert!(script.contains("exec \"/usr/local/bin/skim\" rewrite --hook --agent cursor")); + // Must use absolute path (quoted) + assert!(script.contains("\"/usr/local/bin/skim\"")); + } + + #[test] + fn test_cursor_generate_script_zero_stderr() { + let script = hook().generate_script("/usr/local/bin/skim", "1.0.0"); + // No eprintln or echo to stderr in generated script + assert!(!script.contains(">&2")); + assert!(!script.contains("echo")); + assert!(!script.contains("eprintln")); + } + + #[test] + fn test_cursor_generate_script_init_comment() { + let script = hook().generate_script("/usr/local/bin/skim", "1.0.0"); + assert!(script.contains("skim init --agent cursor")); + } + + #[test] + fn test_cursor_install_default() { + let opts = InstallOpts { + binary_path: "/usr/local/bin/skim".into(), + version: "1.0.0".into(), + config_dir: "/tmp/.cursor".into(), + project_scope: false, + dry_run: false, + }; + let result = hook().install(&opts).unwrap(); + assert!(result.script_path.is_none()); + assert!(!result.config_patched); + } + + #[test] + fn test_cursor_uninstall_default() { + let opts = UninstallOpts { + config_dir: "/tmp/.cursor".into(), + force: false, + }; + assert!(hook().uninstall(&opts).is_ok()); + } +} diff --git a/crates/rskim/src/cmd/hooks/gemini.rs b/crates/rskim/src/cmd/hooks/gemini.rs new file mode 100644 index 0000000..6bbf690 --- /dev/null +++ b/crates/rskim/src/cmd/hooks/gemini.rs @@ -0,0 +1,200 @@ +//! Gemini CLI hook protocol implementation. +//! +//! Implements the `HookProtocol` trait for Gemini CLI's BeforeTool event. +//! +//! Gemini CLI's hook protocol is nearly identical to Claude Code's: +//! - Config: `.gemini/settings.json` +//! - Event: `BeforeTool` +//! - Input: `{ "tool_name": "shell", "tool_input": { "command": "cargo test" } }` +//! - Response: `{ "decision": "allow", "tool_input": { "command": "skim test cargo" } }` +//! +//! SECURITY: Zero stderr in hook mode (GRANITE #361 lesson). +//! SECURITY: Absolute binary path in generated scripts (GRANITE #685 lesson). + +use super::{HookInput, HookProtocol, HookSupport}; +use crate::cmd::session::AgentKind; + +/// Gemini CLI hook implementation. +pub(crate) struct GeminiCliHook; + +impl HookProtocol for GeminiCliHook { + fn agent_kind(&self) -> AgentKind { + AgentKind::GeminiCli + } + + fn hook_support(&self) -> HookSupport { + HookSupport::RealHook + } + + fn parse_input(&self, json: &serde_json::Value) -> Option { + super::parse_tool_input_command(json) + } + + fn format_response(&self, rewritten_command: &str) -> serde_json::Value { + // SECURITY: "decision": "allow" is REQUIRED by Gemini CLI's hook protocol. + // This is NOT the same as Claude Code's permissionDecision -- Gemini CLI's + // BeforeTool response schema requires an explicit decision field. + serde_json::json!({ + "decision": "allow", + "tool_input": { + "command": rewritten_command + } + }) + } + + fn generate_script(&self, binary_path: &str, version: &str) -> String { + super::generate_hook_script(binary_path, version, "gemini") + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::cmd::hooks::{InstallOpts, UninstallOpts}; + + fn hook() -> GeminiCliHook { + GeminiCliHook + } + + #[test] + fn test_gemini_hook_is_real() { + assert_eq!(hook().hook_support(), HookSupport::RealHook); + assert_eq!(hook().agent_kind(), AgentKind::GeminiCli); + } + + #[test] + fn test_gemini_parse_input() { + let json = serde_json::json!({ + "tool_name": "shell", + "tool_input": { + "command": "cargo test" + } + }); + let input = hook().parse_input(&json).expect("should parse input"); + assert_eq!(input.command, "cargo test"); + } + + #[test] + fn test_gemini_format_response() { + let response = hook().format_response("skim test cargo"); + assert_eq!(response["decision"], "allow"); + assert_eq!(response["tool_input"]["command"], "skim test cargo"); + } + + #[test] + fn test_gemini_format_response_has_required_decision_field() { + // SECURITY: Gemini CLI's BeforeTool protocol REQUIRES "decision": "allow" + // in every response. This is NOT Claude Code's permissionDecision -- it is + // a distinct, required field in Gemini CLI's schema. + let response = hook().format_response("skim test cargo"); + assert_eq!( + response.get("decision").and_then(|v| v.as_str()), + Some("allow"), + "Gemini CLI protocol requires 'decision' field set to 'allow'" + ); + } + + #[test] + fn test_gemini_format_response_no_permission_decision() { + // Gemini must not emit Claude Code's permissionDecision field + let response = hook().format_response("skim test cargo"); + assert!( + response.get("permissionDecision").is_none(), + "Gemini response must not contain Claude Code's permissionDecision" + ); + } + + #[test] + fn test_gemini_generate_script_has_absolute_path() { + let script = hook().generate_script("/usr/local/bin/skim", "1.2.3"); + assert!( + script.contains("\"/usr/local/bin/skim\""), + "script must use quoted absolute binary path, got: {script}" + ); + assert!( + script.contains("exec"), + "script must use exec to replace shell process, got: {script}" + ); + } + + #[test] + fn test_gemini_generate_script_has_version() { + let script = hook().generate_script("/usr/local/bin/skim", "0.9.0"); + assert!( + script.contains("SKIM_HOOK_VERSION=\"0.9.0\""), + "script must export SKIM_HOOK_VERSION, got: {script}" + ); + assert!( + script.contains("# skim-hook v0.9.0"), + "script must contain version comment, got: {script}" + ); + } + + #[test] + fn test_gemini_parse_input_missing_command() { + // Missing tool_input entirely + let json = serde_json::json!({"tool_name": "shell"}); + assert!(hook().parse_input(&json).is_none()); + + // tool_input present but no command + let json = serde_json::json!({ + "tool_name": "shell", + "tool_input": {} + }); + assert!(hook().parse_input(&json).is_none()); + + // command is not a string + let json = serde_json::json!({ + "tool_name": "shell", + "tool_input": { + "command": 42 + } + }); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_gemini_generate_script_has_agent_flag() { + let script = hook().generate_script("/usr/local/bin/skim", "1.0.0"); + assert!( + script.contains("--agent gemini"), + "script must pass --agent gemini flag, got: {script}" + ); + } + + #[test] + fn test_gemini_generate_script_has_shebang() { + let script = hook().generate_script("/usr/local/bin/skim", "1.0.0"); + assert!( + script.starts_with("#!/usr/bin/env bash"), + "script must start with bash shebang, got: {script}" + ); + } + + #[test] + fn test_gemini_install_default() { + let opts = InstallOpts { + binary_path: "/usr/local/bin/skim".into(), + version: "1.0.0".into(), + config_dir: "/tmp/.gemini".into(), + project_scope: false, + dry_run: false, + }; + let result = hook().install(&opts).unwrap(); + assert!(result.script_path.is_none()); + assert!(!result.config_patched); + } + + #[test] + fn test_gemini_uninstall_default() { + let opts = UninstallOpts { + config_dir: "/tmp/.gemini".into(), + force: false, + }; + assert!(hook().uninstall(&opts).is_ok()); + } +} diff --git a/crates/rskim/src/cmd/hooks/mod.rs b/crates/rskim/src/cmd/hooks/mod.rs new file mode 100644 index 0000000..151e396 --- /dev/null +++ b/crates/rskim/src/cmd/hooks/mod.rs @@ -0,0 +1,276 @@ +//! Hook protocol abstraction for multi-agent hook integration. +//! +//! Each agent that supports tool interception hooks implements `HookProtocol`. +//! Agents without hook support use awareness-only installation. + +pub(crate) mod claude; +pub(crate) mod codex; +pub(crate) mod copilot; +pub(crate) mod cursor; +pub(crate) mod gemini; +pub(crate) mod opencode; + +use super::session::AgentKind; + +/// Whether an agent supports real hooks or awareness-only. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum HookSupport { + /// Agent supports real tool interception hooks. + RealHook, + /// Agent has no hook mechanism; install awareness files only. + AwarenessOnly, +} + +/// Input extracted from agent's hook event JSON. +#[derive(Debug, Clone)] +pub(crate) struct HookInput { + pub(crate) command: String, +} + +/// Result of a hook installation. +#[derive(Debug)] +#[allow(dead_code)] // Used in per-agent install() tests +pub(crate) struct InstallResult { + pub(crate) script_path: Option, + pub(crate) config_patched: bool, +} + +/// Options passed to install/uninstall. +#[derive(Debug)] +#[allow(dead_code)] // Used in per-agent install() tests +pub(crate) struct InstallOpts { + pub(crate) binary_path: std::path::PathBuf, + pub(crate) version: String, + pub(crate) config_dir: std::path::PathBuf, + pub(crate) project_scope: bool, + pub(crate) dry_run: bool, +} + +/// Options for uninstall. +#[derive(Debug)] +#[allow(dead_code)] // Used in per-agent uninstall() tests +pub(crate) struct UninstallOpts { + pub(crate) config_dir: std::path::PathBuf, + pub(crate) force: bool, +} + +/// Trait for agent-specific hook protocols. +/// +/// Each agent's hook system is different. This trait normalizes: +/// - Hook event parsing (agent JSON -> HookInput) +/// - Response formatting (rewritten command -> agent JSON) +/// - Script generation (binary path -> shell script) +/// - Installation/uninstallation +pub(crate) trait HookProtocol { + #[allow(dead_code)] // Used in tests only + fn agent_kind(&self) -> AgentKind; + + fn hook_support(&self) -> HookSupport; + fn parse_input(&self, json: &serde_json::Value) -> Option; + fn format_response(&self, rewritten_command: &str) -> serde_json::Value; + + #[allow(dead_code)] // Used in tests only + fn generate_script(&self, binary_path: &str, version: &str) -> String; + + /// Default no-op install. Override for agents with real hook installation. + #[allow(dead_code)] // Used in tests only + fn install(&self, _opts: &InstallOpts) -> anyhow::Result { + Ok(InstallResult { + script_path: None, + config_patched: false, + }) + } + + /// Default no-op uninstall. Override for agents with real hook removal. + #[allow(dead_code)] // Used in tests only + fn uninstall(&self, _opts: &UninstallOpts) -> anyhow::Result<()> { + Ok(()) + } +} + +/// Shared parser for agents whose hook JSON nests the command under `tool_input.command`. +/// +/// Used by Claude Code, Copilot CLI, and Gemini CLI. Cursor differs (top-level `command`). +/// Codex and OpenCode are awareness-only and return `None` from `parse_input` directly. +pub(crate) fn parse_tool_input_command(json: &serde_json::Value) -> Option { + let command = json + .get("tool_input") + .and_then(|ti| ti.get("command")) + .and_then(|c| c.as_str())? + .to_string(); + Some(HookInput { command }) +} + +/// Characters that can escape double-quote context or inject shell commands. +/// +/// Matches the set used by `validate_shell_safe_path` in the init installer. +const SHELL_UNSAFE_CHARS: &[char] = &['"', '`', '$', '\\', '\n', '\0']; + +/// Generate a standard hook script for an agent. +/// +/// Shared by all RealHook agents. The script sets `SKIM_HOOK_VERSION` and +/// `exec`s the skim binary with `rewrite --hook --agent `. +/// +/// # Panics +/// +/// Panics if `binary_path`, `version`, or `agent_cli_name` contain +/// shell-unsafe characters (`"`, `` ` ``, `$`, `\`, newline, null). +#[allow(dead_code)] // Called by per-agent generate_script() impls, which are test-only +pub(crate) fn generate_hook_script( + binary_path: &str, + version: &str, + agent_cli_name: &str, +) -> String { + assert!( + !binary_path.chars().any(|c| SHELL_UNSAFE_CHARS.contains(&c)), + "binary_path contains shell-unsafe character: {binary_path}" + ); + assert!( + version + .bytes() + .all(|b| b.is_ascii_alphanumeric() || b == b'.' || b == b'-'), + "version contains unsafe characters for shell interpolation: {version}" + ); + assert!( + agent_cli_name + .bytes() + .all(|b| b.is_ascii_alphanumeric() || b == b'-'), + "agent_cli_name contains unsafe characters for shell interpolation: {agent_cli_name}" + ); + format!( + "#!/usr/bin/env bash\n\ + # skim-hook v{version}\n\ + # Generated by: skim init --agent {agent_cli_name} -- do not edit manually\n\ + export SKIM_HOOK_VERSION=\"{version}\"\n\ + exec \"{binary_path}\" rewrite --hook --agent {agent_cli_name}\n" + ) +} + +/// Factory: create the appropriate HookProtocol implementation for a given agent. +pub(crate) fn protocol_for_agent(kind: AgentKind) -> Box { + match kind { + AgentKind::ClaudeCode => Box::new(claude::ClaudeCodeHook), + AgentKind::Cursor => Box::new(cursor::CursorHook), + AgentKind::GeminiCli => Box::new(gemini::GeminiCliHook), + AgentKind::CopilotCli => Box::new(copilot::CopilotCliHook), + AgentKind::CodexCli => Box::new(codex::CodexCliHook), + AgentKind::OpenCode => Box::new(opencode::OpenCodeHook), + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hook_support_equality() { + assert_eq!(HookSupport::RealHook, HookSupport::RealHook); + assert_ne!(HookSupport::RealHook, HookSupport::AwarenessOnly); + } + + #[test] + fn test_hook_input_clone() { + let input = HookInput { + command: "cargo test".to_string(), + }; + let cloned = input.clone(); + assert_eq!(cloned.command, "cargo test"); + } + + #[test] + fn test_parse_tool_input_command_valid() { + let json = serde_json::json!({ + "tool_input": { + "command": "cargo test --nocapture" + } + }); + let result = parse_tool_input_command(&json); + assert!(result.is_some()); + assert_eq!(result.unwrap().command, "cargo test --nocapture"); + } + + #[test] + fn test_parse_tool_input_command_missing_tool_input() { + let json = serde_json::json!({}); + assert!(parse_tool_input_command(&json).is_none()); + } + + #[test] + fn test_parse_tool_input_command_missing_command() { + let json = serde_json::json!({ + "tool_input": { + "file_path": "/tmp/test.rs" + } + }); + assert!(parse_tool_input_command(&json).is_none()); + } + + #[test] + fn test_parse_tool_input_command_non_string() { + let json = serde_json::json!({ + "tool_input": { + "command": 42 + } + }); + assert!(parse_tool_input_command(&json).is_none()); + } + + #[test] + fn test_generate_hook_script_structure() { + let script = generate_hook_script("/usr/local/bin/skim", "1.2.3", "test-agent"); + assert!(script.starts_with("#!/usr/bin/env bash\n")); + assert!(script.contains("# skim-hook v1.2.3")); + assert!(script.contains("skim init --agent test-agent")); + assert!(script.contains("SKIM_HOOK_VERSION=\"1.2.3\"")); + assert!(script.contains("exec \"/usr/local/bin/skim\" rewrite --hook --agent test-agent")); + } + + // ---- Shell injection guard tests ---- + + #[test] + #[should_panic(expected = "binary_path contains shell-unsafe character")] + fn test_generate_hook_script_rejects_backtick_in_path() { + generate_hook_script("/usr/local/bin/`evil`", "1.0.0", "test-agent"); + } + + #[test] + #[should_panic(expected = "binary_path contains shell-unsafe character")] + fn test_generate_hook_script_rejects_dollar_in_path() { + generate_hook_script("/usr/local/bin/$HOME/skim", "1.0.0", "test-agent"); + } + + #[test] + #[should_panic(expected = "binary_path contains shell-unsafe character")] + fn test_generate_hook_script_rejects_quote_in_path() { + generate_hook_script("/usr/local/bin/sk\"im", "1.0.0", "test-agent"); + } + + #[test] + #[should_panic(expected = "binary_path contains shell-unsafe character")] + fn test_generate_hook_script_rejects_newline_in_path() { + generate_hook_script("/usr/local/bin/skim\n;rm -rf /", "1.0.0", "test-agent"); + } + + #[test] + #[should_panic(expected = "version contains unsafe characters")] + fn test_generate_hook_script_rejects_unsafe_version() { + generate_hook_script("/usr/local/bin/skim", "1.0.0$(evil)", "test-agent"); + } + + #[test] + #[should_panic(expected = "agent_cli_name contains unsafe characters")] + fn test_generate_hook_script_rejects_unsafe_agent_name() { + generate_hook_script("/usr/local/bin/skim", "1.0.0", "agent;rm -rf /"); + } + + #[test] + fn test_generate_hook_script_accepts_path_with_spaces() { + // Spaces are safe because binary_path is double-quoted in the script + let script = generate_hook_script("/Users/my user/bin/skim", "1.0.0", "test-agent"); + assert!(script.contains("exec \"/Users/my user/bin/skim\"")); + } +} diff --git a/crates/rskim/src/cmd/hooks/opencode.rs b/crates/rskim/src/cmd/hooks/opencode.rs new file mode 100644 index 0000000..5e9678e --- /dev/null +++ b/crates/rskim/src/cmd/hooks/opencode.rs @@ -0,0 +1,106 @@ +//! OpenCode hook protocol implementation. +//! +//! OpenCode uses a TypeScript plugin model -- there is no shell hook equivalent. +//! This implementation provides awareness-only support: it registers the agent +//! as recognized but does not intercept tool calls. + +use super::{HookProtocol, HookSupport}; +use crate::cmd::session::AgentKind; + +/// OpenCode awareness-only hook. +/// +/// OpenCode has no shell hook mechanism, so all methods are no-ops. +/// The provider exists so that `skim init --agent opencode` gives +/// a clear "awareness-only" message instead of "unknown agent". +pub(crate) struct OpenCodeHook; + +impl HookProtocol for OpenCodeHook { + fn agent_kind(&self) -> AgentKind { + AgentKind::OpenCode + } + + fn hook_support(&self) -> HookSupport { + HookSupport::AwarenessOnly + } + + fn parse_input(&self, _json: &serde_json::Value) -> Option { + None + } + + fn format_response(&self, _rewritten_command: &str) -> serde_json::Value { + serde_json::Value::Null + } + + fn generate_script(&self, _binary_path: &str, _version: &str) -> String { + String::new() + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::cmd::hooks::{InstallOpts, UninstallOpts}; + + fn hook() -> OpenCodeHook { + OpenCodeHook + } + + #[test] + fn test_opencode_hook_support_is_awareness() { + assert_eq!(hook().hook_support(), HookSupport::AwarenessOnly); + } + + #[test] + fn test_opencode_parse_input_returns_none() { + let json = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + assert!(hook().parse_input(&json).is_none()); + } + + #[test] + fn test_opencode_format_response_returns_null() { + let response = hook().format_response("skim test cargo"); + assert_eq!(response, serde_json::Value::Null); + } + + #[test] + fn test_opencode_agent_kind() { + assert_eq!(hook().agent_kind(), AgentKind::OpenCode); + } + + #[test] + fn test_opencode_generate_script_empty() { + let script = hook().generate_script("/usr/local/bin/skim", "1.0.0"); + assert!(script.is_empty()); + } + + #[test] + fn test_opencode_install_noop() { + let opts = InstallOpts { + binary_path: "/usr/local/bin/skim".into(), + version: "1.0.0".into(), + config_dir: "/tmp/.opencode".into(), + project_scope: false, + dry_run: false, + }; + let result = hook().install(&opts).unwrap(); + assert!(result.script_path.is_none()); + assert!(!result.config_patched); + } + + #[test] + fn test_opencode_uninstall_noop() { + let opts = UninstallOpts { + config_dir: "/tmp/.opencode".into(), + force: false, + }; + assert!(hook().uninstall(&opts).is_ok()); + } +} diff --git a/crates/rskim/src/cmd/init.rs b/crates/rskim/src/cmd/init.rs deleted file mode 100644 index 2304d4c..0000000 --- a/crates/rskim/src/cmd/init.rs +++ /dev/null @@ -1,1047 +0,0 @@ -//! Interactive hook installation for Claude Code (#44) -//! -//! `skim init` installs skim as a Claude Code PreToolUse hook, enabling -//! automatic command rewriting. Supports global (`~/.claude/`) and project-level -//! (`.claude/`) installation with idempotent, atomic writes. -//! -//! The hook script calls `skim rewrite --hook` which reads Claude Code's -//! PreToolUse JSON, rewrites matched commands, and emits `updatedInput`. -//! -//! SECURITY INVARIANT: The hook NEVER sets `permissionDecision`. Unlike -//! competitors, our hook only sets `updatedInput` and lets Claude Code's -//! permission system evaluate independently. - -use std::io::{self, IsTerminal, Write}; -#[cfg(unix)] -use std::os::unix::fs::PermissionsExt; -use std::path::{Path, PathBuf}; -use std::process::ExitCode; - -// ============================================================================ -// Constants -// ============================================================================ - -const HOOK_SCRIPT_NAME: &str = "skim-rewrite.sh"; -const SETTINGS_FILE: &str = "settings.json"; -const SETTINGS_BACKUP: &str = "settings.json.bak"; - -// ============================================================================ -// Public entry points -// ============================================================================ - -/// Run the `init` subcommand. -pub(crate) fn run(args: &[String]) -> anyhow::Result { - // Unix-only guard - if !cfg!(unix) { - anyhow::bail!( - "skim init is only supported on Unix systems (macOS, Linux)\n\ - Windows support is planned for a future release." - ); - } - - // Handle --help / -h - if args.iter().any(|a| matches!(a.as_str(), "--help" | "-h")) { - print_help(); - return Ok(ExitCode::SUCCESS); - } - - // Parse flags - let flags = parse_flags(args)?; - - // Non-TTY detection (B3) - if !flags.yes && !io::stdin().is_terminal() { - eprintln!("error: skim init requires an interactive terminal"); - eprintln!("hint: use --yes for non-interactive mode (e.g., CI)"); - return Ok(ExitCode::FAILURE); - } - - if flags.uninstall { - return run_uninstall(&flags); - } - - run_install(&flags) -} - -/// Build the clap `Command` definition for shell completions. -pub(super) fn command() -> clap::Command { - clap::Command::new("init") - .about("Install skim as a Claude Code hook") - .arg( - clap::Arg::new("global") - .long("global") - .action(clap::ArgAction::SetTrue) - .help("Install to user-level ~/.claude/ (default)"), - ) - .arg( - clap::Arg::new("project") - .long("project") - .action(clap::ArgAction::SetTrue) - .help("Install to .claude/ in current directory"), - ) - .arg( - clap::Arg::new("yes") - .long("yes") - .short('y') - .action(clap::ArgAction::SetTrue) - .help("Non-interactive mode (skip prompts)"), - ) - .arg( - clap::Arg::new("dry-run") - .long("dry-run") - .action(clap::ArgAction::SetTrue) - .help("Print actions without writing"), - ) - .arg( - clap::Arg::new("uninstall") - .long("uninstall") - .action(clap::ArgAction::SetTrue) - .help("Remove hook and clean up"), - ) -} - -// ============================================================================ -// Flag parsing -// ============================================================================ - -#[derive(Debug)] -struct InitFlags { - project: bool, - yes: bool, - dry_run: bool, - uninstall: bool, -} - -fn parse_flags(args: &[String]) -> anyhow::Result { - let mut project = false; - let mut yes = false; - let mut dry_run = false; - let mut uninstall = false; - - for arg in args { - match arg.as_str() { - "--global" => { /* default, no-op */ } - "--project" => project = true, - "--yes" | "-y" => yes = true, - "--dry-run" => dry_run = true, - "--uninstall" => uninstall = true, - other => { - anyhow::bail!( - "unknown flag: '{other}'\n\ - Run 'skim init --help' for usage information" - ); - } - } - } - - Ok(InitFlags { - project, - yes, - dry_run, - uninstall, - }) -} - -// ============================================================================ -// State detection (B5) -// ============================================================================ - -struct DetectedState { - skim_binary: PathBuf, - skim_version: String, - config_dir: PathBuf, - settings_path: PathBuf, - settings_exists: bool, - hook_installed: bool, - hook_version: Option, - marketplace_installed: bool, - /// If installing to one scope and the other scope also has a hook - dual_scope_warning: Option, -} - -fn detect_state(flags: &InitFlags) -> anyhow::Result { - let skim_binary = std::env::current_exe()?; - let skim_version = env!("CARGO_PKG_VERSION").to_string(); - let config_dir = resolve_config_dir(flags.project)?; - let settings_path = config_dir.join(SETTINGS_FILE); - let settings_exists = settings_path.exists(); - - let mut hook_installed = false; - let mut hook_version = None; - let mut marketplace_installed = false; - - if let Some(json) = read_settings_json(&settings_path) { - if let Some(arr) = json - .get("hooks") - .and_then(|h| h.get("PreToolUse")) - .and_then(|v| v.as_array()) - { - for entry in arr { - if has_skim_hook_entry(entry) { - hook_installed = true; - hook_version = extract_hook_version_from_entry(entry, &config_dir); - } - } - } - if json - .get("extraKnownMarketplaces") - .and_then(|m| m.get("skim")) - .is_some() - { - marketplace_installed = true; - } - } - - // Dual-scope check (B5) - let dual_scope_warning = check_dual_scope(flags)?; - - Ok(DetectedState { - skim_binary, - skim_version, - config_dir, - settings_path, - settings_exists, - hook_installed, - hook_version, - marketplace_installed, - dual_scope_warning, - }) -} - -fn check_dual_scope(flags: &InitFlags) -> anyhow::Result> { - let other_dir = if flags.project { - // Installing project-level, check global - resolve_config_dir(false)? - } else { - // Installing global, check project - match std::env::current_dir() { - Ok(cwd) => cwd.join(".claude"), - Err(_) => return Ok(None), - } - }; - - let other_settings = other_dir.join(SETTINGS_FILE); - let has_hook = read_settings_json(&other_settings) - .and_then(|json| { - json.get("hooks")? - .get("PreToolUse")? - .as_array() - .map(|arr| arr.iter().any(has_skim_hook_entry)) - }) - .unwrap_or(false); - - if !has_hook { - return Ok(None); - } - - let scope = if flags.project { - "globally" - } else { - "in project" - }; - let uninstall_scope = if flags.project { - "--global" - } else { - "--project" - }; - let path = other_settings.display(); - Ok(Some(format!( - "skim hook is also installed {scope} ({path})\n \ - Both hooks will fire, but this is harmless -- the second is a no-op.\n \ - To remove: skim init {uninstall_scope} --uninstall" - ))) -} - -/// Maximum settings.json size we'll read (10 MB). Anything larger is almost -/// certainly not a real Claude Code settings file and could cause OOM. -const MAX_SETTINGS_SIZE: u64 = 10 * 1024 * 1024; - -/// Read and parse a settings.json file, returning `None` on any failure. -/// -/// Rejects files larger than [`MAX_SETTINGS_SIZE`] to prevent OOM from -/// maliciously crafted settings files (especially in `--project` mode where -/// the file is under repository control). -fn read_settings_json(path: &Path) -> Option { - let metadata = std::fs::metadata(path).ok()?; - if metadata.len() > MAX_SETTINGS_SIZE { - return None; - } - let contents = std::fs::read_to_string(path).ok()?; - serde_json::from_str(&contents).ok() -} - -/// Check if a PreToolUse entry contains a skim hook (substring match on "skim-rewrite"). -fn has_skim_hook_entry(entry: &serde_json::Value) -> bool { - entry - .get("hooks") - .and_then(|h| h.as_array()) - .is_some_and(|hooks| { - hooks.iter().any(|hook| { - hook.get("command") - .and_then(|c| c.as_str()) - .is_some_and(|cmd| cmd.contains("skim-rewrite")) - }) - }) -} - -/// Try to extract the skim version from the hook script referenced in a settings entry. -/// -/// SECURITY: Validates that the resolved script path is within the expected -/// `{config_dir}/hooks/` directory to prevent arbitrary file reads via -/// attacker-controlled settings.json in `--project` mode. -fn extract_hook_version_from_entry(entry: &serde_json::Value, config_dir: &Path) -> Option { - let hooks_dir = config_dir.join("hooks"); - let hooks = entry.get("hooks")?.as_array()?; - for hook in hooks { - let cmd = hook.get("command")?.as_str()?; - if cmd.contains("skim-rewrite") { - // Try reading the script file - let script_path = if cmd.starts_with('/') || cmd.starts_with('.') { - PathBuf::from(cmd) - } else { - hooks_dir.join(HOOK_SCRIPT_NAME) - }; - - // Validate the resolved path is within the expected hooks directory. - // canonicalize() resolves symlinks and ".." to get the real path. - let canonical = std::fs::canonicalize(&script_path).ok()?; - let canonical_hooks_dir = std::fs::canonicalize(&hooks_dir).ok()?; - if !canonical.starts_with(&canonical_hooks_dir) { - // Path escapes the hooks directory -- skip version extraction. - return None; - } - - if let Ok(contents) = std::fs::read_to_string(&canonical) { - for line in contents.lines() { - if let Some(ver) = line.strip_prefix("# skim-hook v").or_else(|| { - line.strip_prefix("export SKIM_HOOK_VERSION=\"") - .and_then(|s| s.strip_suffix('"')) - }) { - return Some(ver.to_string()); - } - } - } - } - } - None -} - -// ============================================================================ -// Config directory resolution (B6) -// ============================================================================ - -/// Remove skim hook entries and marketplace registration from a settings.json value. -/// -/// 1. Removes skim entries from `hooks.PreToolUse` array -/// 2. Cleans up empty arrays/objects -/// 3. Removes `skim` from `extraKnownMarketplaces` -fn remove_skim_from_settings(settings: &mut serde_json::Value) { - let obj = match settings.as_object_mut() { - Some(obj) => obj, - None => return, - }; - - // Remove skim from PreToolUse - let hooks_empty = obj - .get_mut("hooks") - .and_then(|h| h.as_object_mut()) - .map(|hooks_obj| { - let ptu_empty = hooks_obj - .get_mut("PreToolUse") - .and_then(|ptu| ptu.as_array_mut()) - .map(|arr| { - arr.retain(|entry| !has_skim_hook_entry(entry)); - arr.is_empty() - }) - .unwrap_or(false); - if ptu_empty { - hooks_obj.remove("PreToolUse"); - } - hooks_obj.is_empty() - }) - .unwrap_or(false); - if hooks_empty { - obj.remove("hooks"); - } - - // Remove from extraKnownMarketplaces - let mkts_empty = obj - .get_mut("extraKnownMarketplaces") - .and_then(|m| m.as_object_mut()) - .map(|mkts_obj| { - mkts_obj.remove("skim"); - mkts_obj.is_empty() - }) - .unwrap_or(false); - if mkts_empty { - obj.remove("extraKnownMarketplaces"); - } -} - -/// Resolve a symlink to its absolute target path. -/// -/// `read_link()` can return relative paths. This helper joins the relative -/// target with the symlink's parent directory, then canonicalizes to get an -/// absolute path. -fn resolve_symlink(link: &Path) -> anyhow::Result { - let target = std::fs::read_link(link)?; - if target.is_absolute() { - Ok(target) - } else { - let parent = link.parent().ok_or_else(|| { - anyhow::anyhow!("symlink has no parent directory: {}", link.display()) - })?; - let resolved = parent.join(&target); - std::fs::canonicalize(&resolved).map_err(|e| { - anyhow::anyhow!( - "failed to resolve symlink {} -> {}: {}", - link.display(), - resolved.display(), - e - ) - }) - } -} - -fn resolve_config_dir(project: bool) -> anyhow::Result { - if project { - Ok(std::env::current_dir()?.join(".claude")) - } else if let Ok(dir) = std::env::var("CLAUDE_CONFIG_DIR") { - Ok(PathBuf::from(dir)) - } else { - Ok(dirs::home_dir() - .ok_or_else(|| anyhow::anyhow!("Could not determine home directory"))? - .join(".claude")) - } -} - -// ============================================================================ -// Install flow -// ============================================================================ - -/// Resolved install options from interactive prompts or --yes defaults. -struct InstallOptions { - /// Whether to use project scope (overrides flags.project when user selects it interactively). - project: bool, - /// Whether to install the marketplace entry. - install_marketplace: bool, - /// Whether confirmation was already handled by the prompting phase. - skip_confirmation: bool, -} - -/// Prompt the user for install options (scope and marketplace). -/// -/// In non-interactive mode (--yes), returns defaults immediately. -/// Returns `None` if the user chose project scope interactively (requires re-detection). -fn prompt_install_options( - flags: &InitFlags, - state: &DetectedState, -) -> anyhow::Result { - if flags.yes { - return Ok(InstallOptions { - project: flags.project, - install_marketplace: true, - skip_confirmation: true, - }); - } - - let mut use_project = flags.project; - let mut skip_confirmation = false; - - // Scope prompt (informational -- scope is already determined by --project flag) - if !flags.project { - println!(" ? Where should skim install the hook?"); - println!(" [1] Global (~/.claude/settings.json) [recommended]"); - println!(" [2] Project (.claude/settings.json)"); - let choice = prompt_choice(" Choice [1]: ", 1, &[1, 2])?; - if choice == 2 { - println!(); - println!(" Tip: use `skim init --project` to skip this prompt next time."); - use_project = true; - // User already made a deliberate scope choice -- skip confirmation later - skip_confirmation = true; - } - println!(); - } - - // Plugin prompt - let install_marketplace = if !state.marketplace_installed { - println!(" ? Install the Skimmer plugin? (codebase orientation agent)"); - println!(" Adds /skim command and auto-orientation for new codebases"); - println!(" [1] Yes [recommended]"); - println!(" [2] No"); - let choice = prompt_choice(" Choice [1]: ", 1, &[1, 2])?; - println!(); - choice == 1 - } else { - true - }; - - Ok(InstallOptions { - project: use_project, - install_marketplace, - skip_confirmation, - }) -} - -fn run_install(flags: &InitFlags) -> anyhow::Result { - let state = detect_state(flags)?; - - // Print header - println!(); - println!(" skim init -- Claude Code integration setup"); - println!(); - - // Print detected state - print_detected_state(&state); - - // Already up to date check - if state.hook_installed - && state.hook_version.as_deref() == Some(&state.skim_version) - && state.marketplace_installed - { - println!(" Already up to date. Nothing to do."); - println!(); - return Ok(ExitCode::SUCCESS); - } - - // Dual-scope warning - if let Some(ref warning) = state.dual_scope_warning { - println!(" WARNING: {warning}"); - println!(); - } - - // Prompt for options (or use defaults for --yes) - let options = prompt_install_options(flags, &state)?; - - // If user changed scope interactively, re-detect state with the new scope - let (state, flags_override); - if options.project != flags.project { - flags_override = InitFlags { - project: options.project, - yes: flags.yes, - dry_run: flags.dry_run, - uninstall: false, - }; - state = detect_state(&flags_override)?; - } else { - flags_override = InitFlags { - project: flags.project, - yes: flags.yes, - dry_run: flags.dry_run, - uninstall: false, - }; - state = detect_state(&flags_override)?; - } - - // Print summary - let hook_script_path = state.config_dir.join("hooks").join(HOOK_SCRIPT_NAME); - println!(" Summary:"); - if !state.hook_installed || state.hook_version.as_deref() != Some(&state.skim_version) { - println!(" * Create hook script: {}", hook_script_path.display()); - println!( - " * Patch settings: {} (add PreToolUse hook)", - state.settings_path.display() - ); - } - if options.install_marketplace && !state.marketplace_installed { - println!(" * Register marketplace: skim (dean0x/skim)"); - } - println!(); - - // Confirmation (skip if user already confirmed via scope change or --yes) - if !flags.yes && !options.skip_confirmation && !confirm_proceed()? { - println!(" Cancelled."); - return Ok(ExitCode::SUCCESS); - } - - if flags_override.dry_run { - print_dry_run_actions(&state, options.install_marketplace); - return Ok(ExitCode::SUCCESS); - } - - // Execute installation - execute_install(&state, options.install_marketplace)?; - - println!(); - println!(" Done! skim is now active in Claude Code."); - println!(); - if options.install_marketplace { - println!(" Next step -- install the Skimmer plugin in Claude Code:"); - println!(" /install skimmer@skim"); - println!(); - } - - Ok(ExitCode::SUCCESS) -} - -/// Print the detected state summary to stdout. -fn print_detected_state(state: &DetectedState) { - println!(" Checking current state..."); - println!( - " {} skim binary: {} (v{})", - check_mark(true), - state.skim_binary.display(), - state.skim_version - ); - - let config_label = if state.settings_exists { - "exists" - } else { - "will be created" - }; - println!( - " {} Claude config: {} ({})", - check_mark(state.settings_exists), - state.settings_path.display(), - config_label - ); - - let hook_label = if state.hook_installed { - match &state.hook_version { - Some(v) if v == &state.skim_version => format!("installed (v{v})"), - Some(v) => format!("installed (v{v} -> v{} available)", state.skim_version), - None => "installed".to_string(), - } - } else { - "not installed".to_string() - }; - println!( - " {} Hook: {}", - check_mark(state.hook_installed), - hook_label - ); - println!(); -} - -fn execute_install(state: &DetectedState, install_marketplace: bool) -> anyhow::Result<()> { - // B7: Create hook script - create_hook_script(state)?; - - // B8: Patch settings.json - patch_settings(state, install_marketplace)?; - - Ok(()) -} - -// ============================================================================ -// Hook script generation (B7) -// ============================================================================ - -fn create_hook_script(state: &DetectedState) -> anyhow::Result<()> { - let hooks_dir = state.config_dir.join("hooks"); - let script_path = hooks_dir.join(HOOK_SCRIPT_NAME); - - // Create hooks directory if needed - if !hooks_dir.exists() { - std::fs::create_dir_all(&hooks_dir)?; - #[cfg(unix)] - { - let perms = std::fs::Permissions::from_mode(0o755); - std::fs::set_permissions(&hooks_dir, perms)?; - } - } - - // Check if existing script has same version (idempotent) - if script_path.exists() { - if let Ok(contents) = std::fs::read_to_string(&script_path) { - let version_line = format!("# skim-hook v{}", state.skim_version); - if contents.contains(&version_line) { - println!( - " {} Skipped: {} (already v{})", - check_mark(true), - script_path.display(), - state.skim_version - ); - return Ok(()); - } - // Different version — will overwrite - if let Some(old_ver) = &state.hook_version { - println!( - " {} Updated: {} (v{} -> v{})", - check_mark(true), - script_path.display(), - old_ver, - state.skim_version - ); - } else { - println!(" {} Updated: {}", check_mark(true), script_path.display()); - } - } - } else { - println!(" {} Created: {}", check_mark(true), script_path.display()); - } - - // Generate script content - // Binary path is quoted to handle spaces - let binary_path = state.skim_binary.display(); - let script_content = format!( - "#!/usr/bin/env bash\n\ - # skim-hook v{version}\n\ - # Generated by: skim init -- do not edit manually\n\ - export SKIM_HOOK_VERSION=\"{version}\"\n\ - exec \"{binary_path}\" rewrite --hook\n", - version = state.skim_version, - ); - - // Atomic write: write to tmp, then rename to final path. - // A crash mid-write produces a tmp file instead of a truncated script. - let tmp_path = hooks_dir.join(format!("{HOOK_SCRIPT_NAME}.tmp")); - std::fs::write(&tmp_path, script_content)?; - - // Set executable permissions on the tmp file before renaming - #[cfg(unix)] - { - let perms = std::fs::Permissions::from_mode(0o755); - std::fs::set_permissions(&tmp_path, perms)?; - } - - std::fs::rename(&tmp_path, &script_path)?; - - Ok(()) -} - -// ============================================================================ -// Settings.json patching (B8) -// ============================================================================ - -fn patch_settings(state: &DetectedState, install_marketplace: bool) -> anyhow::Result<()> { - // Ensure config dir exists - if !state.config_dir.exists() { - std::fs::create_dir_all(&state.config_dir)?; - } - - // Resolve symlinks before writing (don't replace symlink with regular file) - let real_settings_path = if state.settings_path.is_symlink() { - resolve_symlink(&state.settings_path)? - } else { - state.settings_path.clone() - }; - - // Read existing settings or start fresh. - // Re-check file existence here instead of using cached `state.settings_exists` - // to avoid TOCTOU race between detect_state() and this write path. - let settings_exists_now = real_settings_path.exists(); - let mut settings: serde_json::Value = if settings_exists_now { - // Guard against oversized files (e.g., attacker-controlled .claude/settings.json) - let file_size = std::fs::metadata(&real_settings_path)?.len(); - if file_size > MAX_SETTINGS_SIZE { - anyhow::bail!( - "settings.json is too large ({} bytes, max {} bytes): {}\n\ - hint: This does not look like a valid Claude Code settings file", - file_size, - MAX_SETTINGS_SIZE, - real_settings_path.display() - ); - } - let contents = std::fs::read_to_string(&real_settings_path)?; - if contents.trim().is_empty() { - // Empty file — treat as {} - serde_json::Value::Object(serde_json::Map::new()) - } else { - serde_json::from_str(&contents).map_err(|e| { - anyhow::anyhow!( - "Failed to parse {}: {}\n\ - hint: Fix the JSON manually, then re-run `skim init`", - real_settings_path.display(), - e - ) - })? - } - } else { - serde_json::Value::Object(serde_json::Map::new()) - }; - - let obj = settings - .as_object_mut() - .ok_or_else(|| anyhow::anyhow!("settings.json root is not an object"))?; - - // Back up existing file (use fresh check, not cached state) - if settings_exists_now { - let backup_path = state.config_dir.join(SETTINGS_BACKUP); - std::fs::copy(&real_settings_path, &backup_path)?; - println!( - " {} Backed up: {} -> {}", - check_mark(true), - state.settings_path.display(), - SETTINGS_BACKUP - ); - } - - // Build the hook script path - let hook_script_path = state.config_dir.join("hooks").join(HOOK_SCRIPT_NAME); - let hook_script_str = hook_script_path.display().to_string(); - - // Ensure hooks.PreToolUse array exists - let hooks = obj - .entry("hooks") - .or_insert_with(|| serde_json::Value::Object(serde_json::Map::new())) - .as_object_mut() - .ok_or_else(|| anyhow::anyhow!("settings.json 'hooks' is not an object"))?; - - let pre_tool_use = hooks - .entry("PreToolUse") - .or_insert_with(|| serde_json::Value::Array(Vec::new())) - .as_array_mut() - .ok_or_else(|| anyhow::anyhow!("settings.json 'hooks.PreToolUse' is not an array"))?; - - // Search for existing skim entry and remove it (to update in place) - pre_tool_use.retain(|entry| !has_skim_hook_entry(entry)); - - // Build the new hook entry - let hook_entry = serde_json::json!({ - "matcher": "Bash", - "hooks": [{ - "type": "command", - "command": hook_script_str, - "timeout": 5 - }] - }); - pre_tool_use.push(hook_entry); - - // Add marketplace (if opted in) - if install_marketplace { - let marketplaces = obj - .entry("extraKnownMarketplaces") - .or_insert_with(|| serde_json::Value::Object(serde_json::Map::new())) - .as_object_mut() - .ok_or_else(|| { - anyhow::anyhow!("settings.json 'extraKnownMarketplaces' is not an object") - })?; - - marketplaces.insert( - "skim".to_string(), - serde_json::json!({"source": {"source": "github", "repo": "dean0x/skim"}}), - ); - } - - // Atomic write: write to tmp, then rename - let pretty = serde_json::to_string_pretty(&settings)?; - let tmp_path = real_settings_path.with_extension("json.tmp"); - std::fs::write(&tmp_path, format!("{pretty}\n"))?; - std::fs::rename(&tmp_path, &real_settings_path)?; - - println!( - " {} Patched: {} (PreToolUse hook added)", - check_mark(true), - state.settings_path.display() - ); - - if install_marketplace { - println!( - " {} Registered: skim marketplace in {}", - check_mark(true), - SETTINGS_FILE - ); - } - - Ok(()) -} - -// ============================================================================ -// Uninstall flow (B10) -// ============================================================================ - -fn run_uninstall(flags: &InitFlags) -> anyhow::Result { - let config_dir = resolve_config_dir(flags.project)?; - let settings_path = config_dir.join(SETTINGS_FILE); - let hook_script_path = config_dir.join("hooks").join(HOOK_SCRIPT_NAME); - - // Check if anything is installed - let settings_has_hook = read_settings_json(&settings_path) - .and_then(|json| { - json.get("hooks")? - .get("PreToolUse")? - .as_array() - .map(|arr| arr.iter().any(has_skim_hook_entry)) - }) - .unwrap_or(false); - - let script_exists = hook_script_path.exists(); - - if !settings_has_hook && !script_exists { - println!(" skim hook not found. Nothing to uninstall."); - return Ok(ExitCode::SUCCESS); - } - - // Interactive confirmation - if !flags.yes { - println!(); - println!(" skim init --uninstall"); - println!(); - if settings_has_hook { - println!(" * Remove hook entry from {}", settings_path.display()); - println!(" * Remove skim from extraKnownMarketplaces"); - } - if script_exists { - println!(" * Delete {}", hook_script_path.display()); - } - println!(); - if !confirm_proceed()? { - println!(" Cancelled."); - return Ok(ExitCode::SUCCESS); - } - } - - if flags.dry_run { - if settings_has_hook { - println!( - " [dry-run] Would remove hook entry from {}", - settings_path.display() - ); - println!(" [dry-run] Would remove skim from extraKnownMarketplaces"); - } - if script_exists { - println!(" [dry-run] Would delete {}", hook_script_path.display()); - } - return Ok(ExitCode::SUCCESS); - } - - // Remove from settings.json - if settings_has_hook { - // Resolve symlinks - let real_path = if settings_path.is_symlink() { - resolve_symlink(&settings_path)? - } else { - settings_path.clone() - }; - - // Guard against oversized files - let file_size = std::fs::metadata(&real_path)?.len(); - if file_size > MAX_SETTINGS_SIZE { - anyhow::bail!( - "settings.json is too large ({} bytes, max {} bytes): {}\n\ - hint: This does not look like a valid Claude Code settings file", - file_size, - MAX_SETTINGS_SIZE, - real_path.display() - ); - } - let contents = std::fs::read_to_string(&real_path)?; - let mut settings: serde_json::Value = serde_json::from_str(&contents)?; - - remove_skim_from_settings(&mut settings); - - // Atomic write - let pretty = serde_json::to_string_pretty(&settings)?; - let tmp_path = real_path.with_extension("json.tmp"); - std::fs::write(&tmp_path, format!("{pretty}\n"))?; - std::fs::rename(&tmp_path, &real_path)?; - - println!( - " {} Removed: hook entry from {}", - check_mark(true), - settings_path.display() - ); - } - - // Delete hook script - if script_exists { - std::fs::remove_file(&hook_script_path)?; - println!( - " {} Deleted: {}", - check_mark(true), - hook_script_path.display() - ); - } - - println!(); - println!(" skim hook has been uninstalled."); - println!(); - - Ok(ExitCode::SUCCESS) -} - -// ============================================================================ -// Dry-run output (B11) -// ============================================================================ - -fn print_dry_run_actions(state: &DetectedState, install_marketplace: bool) { - let hook_script_path = state.config_dir.join("hooks").join(HOOK_SCRIPT_NAME); - - println!(" [dry-run] Would create: {}", hook_script_path.display()); - if state.settings_exists { - println!( - " [dry-run] Would back up: {} -> {}", - state.settings_path.display(), - SETTINGS_BACKUP - ); - } - println!( - " [dry-run] Would patch: {} (add PreToolUse hook)", - state.settings_path.display() - ); - if install_marketplace { - println!( - " [dry-run] Would register: skim marketplace in {}", - SETTINGS_FILE - ); - } -} - -// ============================================================================ -// Interactive prompt helpers -// ============================================================================ - -fn prompt_choice(prompt: &str, default: u32, valid: &[u32]) -> anyhow::Result { - print!("{prompt}"); - io::stdout().flush()?; - let mut input = String::new(); - io::stdin().read_line(&mut input)?; - let trimmed = input.trim(); - if trimmed.is_empty() { - return Ok(default); - } - match trimmed.parse::() { - Ok(n) if valid.contains(&n) => Ok(n), - _ => Ok(default), - } -} - -/// Prompt the user with "Proceed? [Y/n]" and return `true` if confirmed. -fn confirm_proceed() -> anyhow::Result { - print!(" ? Proceed? [Y/n] "); - io::stdout().flush()?; - let mut input = String::new(); - io::stdin().read_line(&mut input)?; - let trimmed = input.trim().to_lowercase(); - let confirmed = trimmed.is_empty() || trimmed == "y" || trimmed == "yes"; - if confirmed { - println!(); - } - Ok(confirmed) -} - -fn check_mark(ok: bool) -> &'static str { - if ok { - "\x1b[32m+\x1b[0m" - } else { - "\x1b[31m-\x1b[0m" - } -} - -// ============================================================================ -// Help text -// ============================================================================ - -fn print_help() { - println!("skim init"); - println!(); - println!(" Install skim as a Claude Code hook for automatic command rewriting"); - println!(); - println!("Usage: skim init [OPTIONS]"); - println!(); - println!("Options:"); - println!(" --global Install to user-level ~/.claude/ (default)"); - println!(" --project Install to .claude/ in current directory"); - println!(" --yes, -y Non-interactive mode (skip prompts)"); - println!(" --dry-run Print actions without writing"); - println!(" --uninstall Remove hook and clean up"); - println!(" --help, -h Print help information"); - println!(); - println!("Examples:"); - println!(" skim init Interactive setup (recommended)"); - println!(" skim init --yes Non-interactive with defaults"); - println!(" skim init --project --yes Install project-level hook"); - println!(" skim init --uninstall Remove skim hook"); - println!(" skim init --dry-run Preview actions without writing"); -} diff --git a/crates/rskim/src/cmd/init/flags.rs b/crates/rskim/src/cmd/init/flags.rs new file mode 100644 index 0000000..7dde543 --- /dev/null +++ b/crates/rskim/src/cmd/init/flags.rs @@ -0,0 +1,135 @@ +//! Flag parsing for `skim init`. + +use crate::cmd::session::AgentKind; + +/// Parsed command-line flags for the init subcommand. +#[derive(Debug)] +pub(super) struct InitFlags { + pub(super) project: bool, + pub(super) yes: bool, + pub(super) dry_run: bool, + pub(super) uninstall: bool, + pub(super) force: bool, + /// Target agent for installation (default: ClaudeCode) + pub(super) agent: AgentKind, +} + +pub(super) fn parse_flags(args: &[String]) -> anyhow::Result { + let mut project = false; + let mut yes = false; + let mut dry_run = false; + let mut uninstall = false; + let mut force = false; + let mut agent = AgentKind::ClaudeCode; + + let mut i = 0; + while i < args.len() { + match args[i].as_str() { + "--global" => { /* default, no-op */ } + "--project" => project = true, + "--yes" | "-y" => yes = true, + "--dry-run" => dry_run = true, + "--uninstall" => uninstall = true, + "--force" => force = true, + "--agent" => { + i += 1; + if i >= args.len() { + anyhow::bail!( + "missing value for --agent\n\ + Supported: {}", + AgentKind::all_supported() + .iter() + .map(|a| a.cli_name()) + .collect::>() + .join(", ") + ); + } + agent = AgentKind::parse_cli_arg(&args[i])?; + } + other => { + anyhow::bail!( + "unknown flag: '{other}'\n\ + Run 'skim init --help' for usage information" + ); + } + } + i += 1; + } + + Ok(InitFlags { + project, + yes, + dry_run, + uninstall, + force, + agent, + }) +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_flags_default_agent_is_claude_code() { + let flags = parse_flags(&["--yes".to_string()]).unwrap(); + assert_eq!(flags.agent, AgentKind::ClaudeCode); + } + + #[test] + fn test_parse_flags_agent_cursor() { + let flags = parse_flags(&[ + "--yes".to_string(), + "--agent".to_string(), + "cursor".to_string(), + ]) + .unwrap(); + assert_eq!(flags.agent, AgentKind::Cursor); + } + + #[test] + fn test_parse_flags_agent_gemini() { + let flags = parse_flags(&[ + "--agent".to_string(), + "gemini".to_string(), + "--yes".to_string(), + ]) + .unwrap(); + assert_eq!(flags.agent, AgentKind::GeminiCli); + } + + #[test] + fn test_parse_flags_agent_unknown_errors() { + let result = parse_flags(&["--agent".to_string(), "unknown-agent".to_string()]); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("unknown agent"), + "error should mention unknown agent: {err}" + ); + } + + #[test] + fn test_parse_flags_agent_missing_value_errors() { + let result = parse_flags(&["--agent".to_string()]); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("missing value"), + "error should mention missing value: {err}" + ); + } + + #[test] + fn test_parse_flags_backward_compat_no_agent() { + // No --agent flag should default to ClaudeCode + let flags = parse_flags(&["--yes".to_string(), "--dry-run".to_string()]).unwrap(); + assert_eq!(flags.agent, AgentKind::ClaudeCode); + assert!(flags.yes); + assert!(flags.dry_run); + } +} diff --git a/crates/rskim/src/cmd/init/helpers.rs b/crates/rskim/src/cmd/init/helpers.rs new file mode 100644 index 0000000..2db2b37 --- /dev/null +++ b/crates/rskim/src/cmd/init/helpers.rs @@ -0,0 +1,233 @@ +//! Shared helper functions and constants for `skim init`. + +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; + +// ============================================================================ +// Shared constants +// ============================================================================ + +pub(super) const HOOK_SCRIPT_NAME: &str = "skim-rewrite.sh"; +pub(super) const SETTINGS_FILE: &str = "settings.json"; +pub(super) const SETTINGS_BACKUP: &str = "settings.json.bak"; + +// ============================================================================ +// Config directory resolution (B6) +// ============================================================================ + +/// Resolve the config directory for a specific agent. +/// +/// For Claude Code: `CLAUDE_CONFIG_DIR` env > `~/.claude/` (or `.claude/` with --project) +/// For Cursor: `~/.cursor/` (macOS: `~/Library/Application Support/Cursor/`) +/// For Gemini: `~/.gemini/` +/// For Copilot: `~/.github/` +/// For others: falls back to `~/.{agent_cli_name}/` +pub(crate) fn resolve_config_dir_for_agent( + project: bool, + agent: crate::cmd::session::AgentKind, +) -> anyhow::Result { + use crate::cmd::session::AgentKind; + + if project { + return Ok(std::env::current_dir()?.join(agent.dot_dir_name())); + } + + // Check agent-specific env override + if agent == AgentKind::ClaudeCode { + if let Ok(dir) = std::env::var("CLAUDE_CONFIG_DIR") { + return Ok(PathBuf::from(dir)); + } + } + + let home = + dirs::home_dir().ok_or_else(|| anyhow::anyhow!("Could not determine home directory"))?; + + Ok(agent.config_dir(&home)) +} + +/// Resolve a symlink to its absolute target path. +/// +/// `read_link()` can return relative paths. This helper joins the relative +/// target with the symlink's parent directory, then canonicalizes to get an +/// absolute path. +pub(super) fn resolve_symlink(link: &Path) -> anyhow::Result { + let target = std::fs::read_link(link)?; + if target.is_absolute() { + Ok(target) + } else { + let parent = link.parent().ok_or_else(|| { + anyhow::anyhow!("symlink has no parent directory: {}", link.display()) + })?; + let resolved = parent.join(&target); + std::fs::canonicalize(&resolved).map_err(|e| { + anyhow::anyhow!( + "failed to resolve symlink {} -> {}: {}", + link.display(), + resolved.display(), + e + ) + }) + } +} + +// ============================================================================ +// Settings I/O helpers (shared by install and uninstall) +// ============================================================================ + +/// Resolve symlinks on the settings path, returning the original path if not a symlink. +pub(super) fn resolve_real_settings_path(path: &Path) -> anyhow::Result { + if path.is_symlink() { + resolve_symlink(path) + } else { + Ok(path.to_path_buf()) + } +} + +/// Read and parse a settings.json file, creating an empty object for missing or empty files. +/// +/// Rejects files larger than [`super::state::MAX_SETTINGS_SIZE`] to prevent OOM. +pub(super) fn load_or_create_settings(path: &Path) -> anyhow::Result { + if !path.exists() { + return Ok(serde_json::Value::Object(serde_json::Map::new())); + } + + let file_size = std::fs::metadata(path)?.len(); + if file_size > super::state::MAX_SETTINGS_SIZE { + anyhow::bail!( + "settings.json is too large ({} bytes, max {} bytes): {}\n\ + hint: This does not look like a valid Claude Code settings file", + file_size, + super::state::MAX_SETTINGS_SIZE, + path.display() + ); + } + + let contents = std::fs::read_to_string(path)?; + if contents.trim().is_empty() { + return Ok(serde_json::Value::Object(serde_json::Map::new())); + } + + serde_json::from_str(&contents).map_err(|e| { + anyhow::anyhow!( + "Failed to parse {}: {}\n\ + hint: Fix the JSON manually, then re-run `skim init`", + path.display(), + e + ) + }) +} + +/// Atomically write settings JSON to disk using tmp+rename. +pub(super) fn atomic_write_settings( + settings: &serde_json::Value, + path: &Path, +) -> anyhow::Result<()> { + let pretty = serde_json::to_string_pretty(settings)?; + let tmp_path = path.with_extension("json.tmp"); + std::fs::write(&tmp_path, format!("{pretty}\n"))?; + std::fs::rename(&tmp_path, path)?; + Ok(()) +} + +// ============================================================================ +// Interactive prompt helpers +// ============================================================================ + +pub(super) fn prompt_choice(prompt: &str, default: u32, valid: &[u32]) -> anyhow::Result { + print!("{prompt}"); + io::stdout().flush()?; + let mut input = String::new(); + io::stdin().read_line(&mut input)?; + let trimmed = input.trim(); + if trimmed.is_empty() { + return Ok(default); + } + match trimmed.parse::() { + Ok(n) if valid.contains(&n) => Ok(n), + _ => Ok(default), + } +} + +/// Prompt the user with "Proceed? [Y/n]" and return `true` if confirmed. +pub(super) fn confirm_proceed() -> anyhow::Result { + print!(" ? Proceed? [Y/n] "); + io::stdout().flush()?; + let mut input = String::new(); + io::stdin().read_line(&mut input)?; + let trimmed = input.trim().to_lowercase(); + let confirmed = trimmed.is_empty() || trimmed == "y" || trimmed == "yes"; + if confirmed { + println!(); + } + Ok(confirmed) +} + +pub(super) fn check_mark(ok: bool) -> &'static str { + if ok { + "\x1b[32m+\x1b[0m" + } else { + "\x1b[31m-\x1b[0m" + } +} + +// ============================================================================ +// Help text +// ============================================================================ + +pub(super) fn print_help() { + println!("skim init"); + println!(); + println!(" Install skim as an agent hook for automatic command rewriting"); + println!(); + println!("Usage: skim init [OPTIONS]"); + println!(); + println!("Options:"); + println!(" --global Install to user-level config directory (default)"); + println!(" --project Install to project-level config directory"); + println!(" --agent Target agent (default: claude-code)"); + println!( + " Supported: claude-code, cursor, gemini, copilot, codex, opencode" + ); + println!(" --yes, -y Non-interactive mode (skip prompts)"); + println!(" --dry-run Print actions without writing"); + println!(" --uninstall Remove hook and clean up"); + println!(" --force Force uninstall even if hook script was modified"); + println!(" --help, -h Print help information"); + println!(); + println!("Examples:"); + println!(" skim init Interactive Claude Code setup (recommended)"); + println!(" skim init --yes Non-interactive with defaults"); + println!(" skim init --agent cursor --yes Install for Cursor"); + println!(" skim init --agent gemini --yes Install for Gemini CLI"); + println!(" skim init --project --yes Install project-level hook"); + println!(" skim init --uninstall Remove skim hook"); + println!(" skim init --dry-run Preview actions without writing"); +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_load_or_create_settings_missing_file() { + let dir = tempfile::TempDir::new().unwrap(); + let path = dir.path().join("does-not-exist.json"); + let result = load_or_create_settings(&path).unwrap(); + assert!(result.is_object()); + assert!(result.as_object().unwrap().is_empty()); + } + + #[test] + fn test_load_or_create_settings_empty_file() { + let dir = tempfile::TempDir::new().unwrap(); + let path = dir.path().join("settings.json"); + std::fs::write(&path, " \n").unwrap(); + let result = load_or_create_settings(&path).unwrap(); + assert!(result.is_object()); + assert!(result.as_object().unwrap().is_empty()); + } +} diff --git a/crates/rskim/src/cmd/init/install.rs b/crates/rskim/src/cmd/init/install.rs new file mode 100644 index 0000000..ce21226 --- /dev/null +++ b/crates/rskim/src/cmd/init/install.rs @@ -0,0 +1,613 @@ +//! Install flow for `skim init`. + +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; + +use super::flags::InitFlags; +use super::helpers::{ + check_mark, confirm_proceed, prompt_choice, HOOK_SCRIPT_NAME, SETTINGS_BACKUP, SETTINGS_FILE, +}; +use super::state::{detect_state, has_skim_hook_entry, DetectedState}; + +/// Resolved install options from interactive prompts or --yes defaults. +struct InstallOptions { + /// Whether to use project scope (overrides flags.project when user selects it interactively). + project: bool, + /// Whether to install the marketplace entry. + install_marketplace: bool, + /// Whether confirmation was already handled by the prompting phase. + skip_confirmation: bool, +} + +/// Prompt the user for install options (scope and marketplace). +/// +/// In non-interactive mode (--yes), returns defaults immediately. +/// Returns `None` if the user chose project scope interactively (requires re-detection). +fn prompt_install_options( + flags: &InitFlags, + state: &DetectedState, +) -> anyhow::Result { + if flags.yes { + return Ok(InstallOptions { + project: flags.project, + install_marketplace: true, + skip_confirmation: true, + }); + } + + let mut use_project = flags.project; + let mut skip_confirmation = false; + + // Scope prompt (informational -- scope is already determined by --project flag) + if !flags.project { + println!(" ? Where should skim install the hook?"); + println!(" [1] Global (~/.claude/settings.json) [recommended]"); + println!(" [2] Project (.claude/settings.json)"); + let choice = prompt_choice(" Choice [1]: ", 1, &[1, 2])?; + if choice == 2 { + println!(); + println!(" Tip: use `skim init --project` to skip this prompt next time."); + use_project = true; + // User already made a deliberate scope choice -- skip confirmation later + skip_confirmation = true; + } + println!(); + } + + // Plugin prompt + let install_marketplace = if !state.marketplace_installed { + println!(" ? Install the Skimmer plugin? (codebase orientation agent)"); + println!(" Adds /skim command and auto-orientation for new codebases"); + println!(" [1] Yes [recommended]"); + println!(" [2] No"); + let choice = prompt_choice(" Choice [1]: ", 1, &[1, 2])?; + println!(); + choice == 1 + } else { + true + }; + + Ok(InstallOptions { + project: use_project, + install_marketplace, + skip_confirmation, + }) +} + +/// Verify that the target agent appears to be installed on this system. +/// +/// Checks for the expected config directory. If the agent's config dir +/// doesn't exist, returns an error with a helpful message rather than +/// silently creating an orphan config. +fn verify_agent_installed(state: &DetectedState, flags: &InitFlags) -> anyhow::Result<()> { + use crate::cmd::session::AgentKind; + + // Claude Code: always proceed (we create ~/.claude/ if needed) + if flags.agent == AgentKind::ClaudeCode { + return Ok(()); + } + + // For --project mode, we always create the dir, so skip the check + if flags.project { + return Ok(()); + } + + // Check if the config dir exists (or a parent indicator) + if !state.config_dir.exists() { + let hint = match flags.agent { + AgentKind::Cursor => "Install Cursor from https://cursor.com", + AgentKind::GeminiCli => "Install Gemini CLI: npm install -g @google/gemini-cli", + AgentKind::CopilotCli => { + "Install GitHub Copilot CLI: gh extension install github/gh-copilot" + } + AgentKind::CodexCli => "Install Codex CLI: npm install -g @openai/codex", + AgentKind::OpenCode => { + "Install OpenCode: go install github.com/opencode-ai/opencode@latest" + } + AgentKind::ClaudeCode => unreachable!("handled above"), + }; + anyhow::bail!( + "{} does not appear to be installed (config dir not found: {})\nhint: {}", + flags.agent.display_name(), + state.config_dir.display(), + hint + ); + } + + Ok(()) +} + +pub(super) fn run_install(flags: &InitFlags) -> anyhow::Result { + let state = detect_state(flags)?; + + // Verify agent is installed before proceeding + verify_agent_installed(&state, flags)?; + + // Print header + println!(); + println!( + " skim init -- {} integration setup", + flags.agent.display_name() + ); + println!(); + + // Print detected state + print_detected_state(&state); + + // Plugin collision warning: other Bash PreToolUse hooks exist + if !state.existing_bash_hooks.is_empty() { + println!(" WARNING: Other Bash PreToolUse hooks detected:"); + for hook_cmd in &state.existing_bash_hooks { + println!(" - {hook_cmd}"); + } + println!(" Both hooks will fire on Bash commands. This is usually harmless"); + println!(" but may cause unexpected behavior if the other hook also modifies commands."); + println!(); + } + + // Already up to date check + if state.hook_installed + && state.hook_version.as_deref() == Some(&state.skim_version) + && state.marketplace_installed + { + println!(" Already up to date. Nothing to do."); + println!(); + return Ok(std::process::ExitCode::SUCCESS); + } + + // Dual-scope warning + if let Some(ref warning) = state.dual_scope_warning { + println!(" WARNING: {warning}"); + println!(); + } + + // Prompt for options (or use defaults for --yes) + let options = prompt_install_options(flags, &state)?; + + // Re-detect state with the resolved scope (may differ from flags if user + // changed scope interactively) + let flags_override = InitFlags { + project: options.project, + yes: flags.yes, + dry_run: flags.dry_run, + uninstall: false, + force: flags.force, + agent: flags.agent, + }; + let state = detect_state(&flags_override)?; + + // Print summary + let hook_script_path = state.config_dir.join("hooks").join(HOOK_SCRIPT_NAME); + println!(" Summary:"); + if !state.hook_installed || state.hook_version.as_deref() != Some(&state.skim_version) { + println!(" * Create hook script: {}", hook_script_path.display()); + println!( + " * Patch settings: {} (add PreToolUse hook)", + state.settings_path.display() + ); + } + if options.install_marketplace && !state.marketplace_installed { + println!(" * Register marketplace: skim (dean0x/skim)"); + } + println!(); + + // Confirmation (skip if user already confirmed via scope change or --yes) + if !flags.yes && !options.skip_confirmation && !confirm_proceed()? { + println!(" Cancelled."); + return Ok(std::process::ExitCode::SUCCESS); + } + + if flags_override.dry_run { + print_dry_run_actions(&state, options.install_marketplace); + return Ok(std::process::ExitCode::SUCCESS); + } + + // Execute installation + execute_install(&state, options.install_marketplace)?; + + println!(); + println!( + " Done! skim is now active in {}.", + flags_override.agent.display_name() + ); + println!(); + if options.install_marketplace { + println!( + " Next step -- install the Skimmer plugin in {}:", + flags_override.agent.display_name() + ); + println!(" /install skimmer@skim"); + println!(); + } + + Ok(std::process::ExitCode::SUCCESS) +} + +/// Print the detected state summary to stdout. +pub(super) fn print_detected_state(state: &DetectedState) { + println!(" Checking current state..."); + println!( + " {} skim binary: {} (v{})", + check_mark(true), + state.skim_binary.display(), + state.skim_version + ); + + let config_label = if state.settings_exists { + "exists" + } else { + "will be created" + }; + println!( + " {} Config: {} ({})", + check_mark(state.settings_exists), + state.settings_path.display(), + config_label + ); + + let hook_label = if state.hook_installed { + match &state.hook_version { + Some(v) if v == &state.skim_version => format!("installed (v{v})"), + Some(v) => format!("installed (v{v} -> v{} available)", state.skim_version), + None => "installed".to_string(), + } + } else { + "not installed".to_string() + }; + println!( + " {} Hook: {}", + check_mark(state.hook_installed), + hook_label + ); + println!(); +} + +fn execute_install(state: &DetectedState, install_marketplace: bool) -> anyhow::Result<()> { + // B7: Create hook script + create_hook_script(state)?; + + // B8: Patch settings.json + patch_settings(state, install_marketplace)?; + + Ok(()) +} + +// ============================================================================ +// Hook script generation (B7) +// ============================================================================ + +/// Validate that a path is safe to interpolate into a double-quoted bash string. +/// +/// Rejects characters that can escape double-quote context or inject commands: +/// - `"` (closes the quote) +/// - `` ` `` (command substitution) +/// - `$` (variable/command expansion) +/// - `\` (escape sequences) +/// - newline / null byte (command injection) +/// +/// Paths from `current_exe()` on any mainstream OS should never contain these, +/// so this guard only fires on adversarial or corrupted environments. +fn validate_shell_safe_path(path: &str) -> anyhow::Result<()> { + const UNSAFE_CHARS: &[char] = &['"', '`', '$', '\\', '\n', '\0']; + if let Some(bad) = path.chars().find(|c| UNSAFE_CHARS.contains(c)) { + anyhow::bail!( + "binary path contains shell-unsafe character {:?}: {}\n\ + hint: reinstall skim to a path without special characters", + bad, + path + ); + } + Ok(()) +} + +fn create_hook_script(state: &DetectedState) -> anyhow::Result<()> { + let hooks_dir = state.config_dir.join("hooks"); + let script_path = hooks_dir.join(HOOK_SCRIPT_NAME); + + // Create hooks directory if needed + if !hooks_dir.exists() { + std::fs::create_dir_all(&hooks_dir)?; + #[cfg(unix)] + { + let perms = std::fs::Permissions::from_mode(0o755); + std::fs::set_permissions(&hooks_dir, perms)?; + } + } + + // Check if existing script has same version (idempotent) + if script_path.exists() { + if let Ok(contents) = std::fs::read_to_string(&script_path) { + let version_line = format!("# skim-hook v{}", state.skim_version); + if contents.contains(&version_line) { + println!( + " {} Skipped: {} (already v{})", + check_mark(true), + script_path.display(), + state.skim_version + ); + return Ok(()); + } + // Different version — will overwrite + if let Some(old_ver) = &state.hook_version { + println!( + " {} Updated: {} (v{} -> v{})", + check_mark(true), + script_path.display(), + old_ver, + state.skim_version + ); + } else { + println!(" {} Updated: {}", check_mark(true), script_path.display()); + } + } + } else { + println!(" {} Created: {}", check_mark(true), script_path.display()); + } + + // Generate script content + // Binary path is quoted to handle spaces, but we must also reject + // characters that can escape double-quote context in bash. + let binary_path = state.skim_binary.display().to_string(); + validate_shell_safe_path(&binary_path)?; + + let agent_flag = if state.agent_cli_name == "claude-code" { + String::new() + } else { + format!(" --agent {}", state.agent_cli_name) + }; + let script_content = format!( + "#!/usr/bin/env bash\n\ + # skim-hook v{version}\n\ + # Generated by: skim init -- do not edit manually\n\ + export SKIM_HOOK_VERSION=\"{version}\"\n\ + exec \"{binary_path}\" rewrite --hook{agent_flag}\n", + version = state.skim_version, + ); + + // Atomic write: write to tmp, then rename to final path. + // A crash mid-write produces a tmp file instead of a truncated script. + let tmp_path = hooks_dir.join(format!("{HOOK_SCRIPT_NAME}.tmp")); + std::fs::write(&tmp_path, script_content)?; + + // Set executable permissions on the tmp file before renaming + #[cfg(unix)] + { + let perms = std::fs::Permissions::from_mode(0o755); + std::fs::set_permissions(&tmp_path, perms)?; + } + + std::fs::rename(&tmp_path, &script_path)?; + + // Compute and store SHA-256 hash for integrity verification (#57) + if let Ok(hash) = crate::cmd::integrity::compute_file_hash(&script_path) { + let _ = crate::cmd::integrity::write_hash_manifest( + &state.config_dir, + state.agent_cli_name, + HOOK_SCRIPT_NAME, + &hash, + ); + } + + Ok(()) +} + +// ============================================================================ +// Settings.json patching (B8) +// ============================================================================ + +use super::helpers::{atomic_write_settings, load_or_create_settings, resolve_real_settings_path}; + +/// Back up the settings file before modification. +/// +/// Re-checks that `real_path` is not a symlink immediately before copying to +/// close the TOCTOU window between `resolve_real_settings_path()` and the +/// actual I/O. Without this guard, an attacker could replace the file with a +/// symlink after resolution, causing `fs::copy` to overwrite an arbitrary +/// target. +fn backup_settings( + config_dir: &std::path::Path, + real_path: &std::path::Path, +) -> anyhow::Result<()> { + // Guard: reject if the path became a symlink since resolution + if real_path.is_symlink() { + anyhow::bail!( + "settings path became a symlink after resolution: {}\n\ + hint: this may indicate a symlink race; please verify the path manually", + real_path.display() + ); + } + let backup_path = config_dir.join(SETTINGS_BACKUP); + std::fs::copy(real_path, &backup_path)?; + Ok(()) +} + +/// Insert or update the skim hook entry in `hooks.PreToolUse`. +fn upsert_hook_entry( + settings: &mut serde_json::Value, + hook_script_path: &str, +) -> anyhow::Result<()> { + let obj = settings + .as_object_mut() + .ok_or_else(|| anyhow::anyhow!("settings.json root is not an object"))?; + + let hooks = obj + .entry("hooks") + .or_insert_with(|| serde_json::Value::Object(serde_json::Map::new())) + .as_object_mut() + .ok_or_else(|| anyhow::anyhow!("settings.json 'hooks' is not an object"))?; + + let pre_tool_use = hooks + .entry("PreToolUse") + .or_insert_with(|| serde_json::Value::Array(Vec::new())) + .as_array_mut() + .ok_or_else(|| anyhow::anyhow!("settings.json 'hooks.PreToolUse' is not an array"))?; + + // Remove existing skim entry (to update in place) + pre_tool_use.retain(|entry| !has_skim_hook_entry(entry)); + + // Insert new entry + pre_tool_use.push(serde_json::json!({ + "matcher": "Bash", + "hooks": [{ + "type": "command", + "command": hook_script_path, + "timeout": 5 + }] + })); + + Ok(()) +} + +fn patch_settings(state: &DetectedState, install_marketplace: bool) -> anyhow::Result<()> { + // Ensure config dir exists + if !state.config_dir.exists() { + std::fs::create_dir_all(&state.config_dir)?; + } + + let real_path = resolve_real_settings_path(&state.settings_path)?; + let mut settings = load_or_create_settings(&real_path)?; + + // Back up existing file (re-check existence to avoid TOCTOU race) + if real_path.exists() { + backup_settings(&state.config_dir, &real_path)?; + println!( + " {} Backed up: {} -> {}", + check_mark(true), + state.settings_path.display(), + SETTINGS_BACKUP + ); + } + + // Upsert hook entry + let hook_script_path = state.config_dir.join("hooks").join(HOOK_SCRIPT_NAME); + upsert_hook_entry(&mut settings, &hook_script_path.display().to_string())?; + + // Add marketplace (if opted in) + if install_marketplace { + let obj = settings + .as_object_mut() + .ok_or_else(|| anyhow::anyhow!("settings.json root is not an object"))?; + + let marketplaces = obj + .entry("extraKnownMarketplaces") + .or_insert_with(|| serde_json::Value::Object(serde_json::Map::new())) + .as_object_mut() + .ok_or_else(|| { + anyhow::anyhow!("settings.json 'extraKnownMarketplaces' is not an object") + })?; + + marketplaces.insert( + "skim".to_string(), + serde_json::json!({"source": {"source": "github", "repo": "dean0x/skim"}}), + ); + } + + atomic_write_settings(&settings, &real_path)?; + + println!( + " {} Patched: {} (PreToolUse hook added)", + check_mark(true), + state.settings_path.display() + ); + + if install_marketplace { + println!( + " {} Registered: skim marketplace in {}", + check_mark(true), + SETTINGS_FILE + ); + } + + Ok(()) +} + +// ============================================================================ +// Dry-run output (B11) +// ============================================================================ + +pub(super) fn print_dry_run_actions(state: &DetectedState, install_marketplace: bool) { + let hook_script_path = state.config_dir.join("hooks").join(HOOK_SCRIPT_NAME); + + println!(" [dry-run] Would create: {}", hook_script_path.display()); + if state.settings_exists { + println!( + " [dry-run] Would back up: {} -> {}", + state.settings_path.display(), + SETTINGS_BACKUP + ); + } + println!( + " [dry-run] Would patch: {} (add PreToolUse hook)", + state.settings_path.display() + ); + if install_marketplace { + println!( + " [dry-run] Would register: skim marketplace in {}", + SETTINGS_FILE + ); + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_upsert_hook_entry_idempotent() { + let mut settings = serde_json::json!({}); + upsert_hook_entry(&mut settings, "/path/to/skim-rewrite.sh").unwrap(); + upsert_hook_entry(&mut settings, "/path/to/skim-rewrite.sh").unwrap(); + + let entries = settings["hooks"]["PreToolUse"].as_array().unwrap(); + assert_eq!( + entries.len(), + 1, + "running upsert twice should produce exactly one entry, not a duplicate" + ); + } + + // ---- Shell-safe path validation (SEC-1) ---- + + #[test] + fn test_validate_shell_safe_path_normal_paths() { + assert!(validate_shell_safe_path("/usr/local/bin/skim").is_ok()); + assert!(validate_shell_safe_path("/home/user/.cargo/bin/skim").is_ok()); + assert!(validate_shell_safe_path("/path/with spaces/skim").is_ok()); + } + + #[test] + fn test_validate_shell_safe_path_rejects_double_quote() { + let result = validate_shell_safe_path("/path/with\"quote/skim"); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("shell-unsafe")); + } + + #[test] + fn test_validate_shell_safe_path_rejects_backtick() { + assert!(validate_shell_safe_path("/path/with`cmd`/skim").is_err()); + } + + #[test] + fn test_validate_shell_safe_path_rejects_dollar() { + assert!(validate_shell_safe_path("/path/$HOME/skim").is_err()); + } + + #[test] + fn test_validate_shell_safe_path_rejects_backslash() { + assert!(validate_shell_safe_path("/path/with\\escape/skim").is_err()); + } + + #[test] + fn test_validate_shell_safe_path_rejects_newline() { + assert!(validate_shell_safe_path("/path/with\nnewline/skim").is_err()); + } + + #[test] + fn test_validate_shell_safe_path_rejects_null_byte() { + assert!(validate_shell_safe_path("/path/with\0null/skim").is_err()); + } +} diff --git a/crates/rskim/src/cmd/init/mod.rs b/crates/rskim/src/cmd/init/mod.rs new file mode 100644 index 0000000..362d19b --- /dev/null +++ b/crates/rskim/src/cmd/init/mod.rs @@ -0,0 +1,116 @@ +//! Interactive hook installation for Claude Code (#44) +//! +//! `skim init` installs skim as a Claude Code PreToolUse hook, enabling +//! automatic command rewriting. Supports global (`~/.claude/`) and project-level +//! (`.claude/`) installation with idempotent, atomic writes. +//! +//! The hook script calls `skim rewrite --hook` which reads Claude Code's +//! PreToolUse JSON, rewrites matched commands, and emits `updatedInput`. +//! +//! SECURITY INVARIANT (Claude Code): The Claude Code hook NEVER sets +//! `permissionDecision`. It only emits `updatedInput` inside +//! `hookSpecificOutput` and lets Claude Code's permission system evaluate +//! independently. Other agents have their own required response fields +//! (e.g., Cursor uses `"permission": "allow"`, Gemini CLI uses +//! `"decision": "allow"`) -- see each agent's `format_response()` in +//! `cmd/hooks/` for protocol-specific documentation. + +mod flags; +mod helpers; +mod install; +mod state; +mod uninstall; + +use std::io::IsTerminal; +use std::process::ExitCode; + +use flags::parse_flags; +use helpers::print_help; +use install::run_install; +use uninstall::run_uninstall; + +pub(crate) use helpers::resolve_config_dir_for_agent; +pub(crate) use state::has_skim_hook_entry; +pub(crate) use state::MAX_SETTINGS_SIZE; + +/// Run the `init` subcommand. +pub(crate) fn run(args: &[String]) -> anyhow::Result { + // Unix-only guard + if !cfg!(unix) { + anyhow::bail!( + "skim init is only supported on Unix systems (macOS, Linux)\n\ + Windows support is planned for a future release." + ); + } + + // Handle --help / -h + if args.iter().any(|a| matches!(a.as_str(), "--help" | "-h")) { + print_help(); + return Ok(ExitCode::SUCCESS); + } + + // Parse flags + let flags = parse_flags(args)?; + + // Non-TTY detection (B3) + if !flags.yes && !std::io::stdin().is_terminal() { + eprintln!("error: skim init requires an interactive terminal"); + eprintln!("hint: use --yes for non-interactive mode (e.g., CI)"); + return Ok(ExitCode::FAILURE); + } + + if flags.uninstall { + return run_uninstall(&flags); + } + + run_install(&flags) +} + +/// Build the clap `Command` definition for shell completions. +pub(super) fn command() -> clap::Command { + clap::Command::new("init") + .about("Install skim as an agent hook") + .arg( + clap::Arg::new("global") + .long("global") + .action(clap::ArgAction::SetTrue) + .help("Install to user-level config directory (default)"), + ) + .arg( + clap::Arg::new("project") + .long("project") + .action(clap::ArgAction::SetTrue) + .help("Install to project-level config directory"), + ) + .arg( + clap::Arg::new("agent") + .long("agent") + .value_name("NAME") + .help("Target agent (default: claude-code)"), + ) + .arg( + clap::Arg::new("yes") + .long("yes") + .short('y') + .action(clap::ArgAction::SetTrue) + .help("Non-interactive mode (skip prompts)"), + ) + .arg( + clap::Arg::new("dry-run") + .long("dry-run") + .action(clap::ArgAction::SetTrue) + .help("Print actions without writing"), + ) + .arg( + clap::Arg::new("uninstall") + .long("uninstall") + .action(clap::ArgAction::SetTrue) + .help("Remove hook and clean up"), + ) + .arg( + clap::Arg::new("force") + .long("force") + .action(clap::ArgAction::SetTrue) + .help("Force operation (e.g., uninstall tampered hook)"), + ) +} diff --git a/crates/rskim/src/cmd/init/state.rs b/crates/rskim/src/cmd/init/state.rs new file mode 100644 index 0000000..7a731f8 --- /dev/null +++ b/crates/rskim/src/cmd/init/state.rs @@ -0,0 +1,320 @@ +//! State detection for `skim init` (B5). + +use std::path::{Path, PathBuf}; + +use super::flags::InitFlags; +use super::helpers::{resolve_config_dir_for_agent, HOOK_SCRIPT_NAME, SETTINGS_FILE}; + +/// Maximum settings.json size we'll read (10 MB). Anything larger is almost +/// certainly not a real Claude Code settings file and could cause OOM. +pub(crate) const MAX_SETTINGS_SIZE: u64 = 10 * 1024 * 1024; + +pub(super) struct DetectedState { + pub(super) skim_binary: PathBuf, + pub(super) skim_version: String, + pub(super) config_dir: PathBuf, + pub(super) settings_path: PathBuf, + pub(super) settings_exists: bool, + pub(super) hook_installed: bool, + pub(super) hook_version: Option, + pub(super) marketplace_installed: bool, + /// If installing to one scope and the other scope also has a hook + pub(super) dual_scope_warning: Option, + /// Existing non-skim Bash PreToolUse hooks (plugin collision detection) + pub(super) existing_bash_hooks: Vec, + /// CLI name of the target agent (e.g., "claude-code", "cursor") for integrity hashing + pub(super) agent_cli_name: &'static str, +} + +pub(super) fn detect_state(flags: &InitFlags) -> anyhow::Result { + let skim_binary = std::env::current_exe()?; + let skim_version = env!("CARGO_PKG_VERSION").to_string(); + let config_dir = resolve_config_dir_for_agent(flags.project, flags.agent)?; + let settings_path = config_dir.join(SETTINGS_FILE); + let settings_exists = settings_path.exists(); + + let mut hook_installed = false; + let mut hook_version = None; + let mut marketplace_installed = false; + + let parsed_settings = read_settings_json(&settings_path); + if let Some(ref json) = parsed_settings { + if let Some(arr) = json + .get("hooks") + .and_then(|h| h.get("PreToolUse")) + .and_then(|v| v.as_array()) + { + for entry in arr { + if has_skim_hook_entry(entry) { + hook_installed = true; + hook_version = extract_hook_version_from_entry(entry, &config_dir); + } + } + } + if json + .get("extraKnownMarketplaces") + .and_then(|m| m.get("skim")) + .is_some() + { + marketplace_installed = true; + } + } + + // Scan for existing non-skim Bash PreToolUse hooks (plugin collision detection) + let existing_bash_hooks = scan_existing_bash_hooks(parsed_settings.as_ref()); + + // Dual-scope check (B5) + let dual_scope_warning = check_dual_scope(flags)?; + + Ok(DetectedState { + skim_binary, + skim_version, + config_dir, + settings_path, + settings_exists, + hook_installed, + hook_version, + marketplace_installed, + dual_scope_warning, + existing_bash_hooks, + agent_cli_name: flags.agent.cli_name(), + }) +} + +/// Scan already-parsed settings JSON for existing non-skim Bash PreToolUse hooks. +/// +/// Returns the command strings of any Bash-matcher entries that are NOT skim entries. +/// Used for plugin collision detection -- warns the user if another tool is also +/// intercepting Bash commands. +/// +/// Accepts `Option<&Value>` so callers can reuse an already-parsed settings file +/// instead of re-reading from disk. +fn scan_existing_bash_hooks(parsed: Option<&serde_json::Value>) -> Vec { + let json = match parsed { + Some(j) => j, + None => return Vec::new(), + }; + + let entries = match json + .get("hooks") + .and_then(|h| h.get("PreToolUse")) + .and_then(|ptu| ptu.as_array()) + { + Some(arr) => arr, + None => return Vec::new(), + }; + + let mut other_hooks = Vec::new(); + for entry in entries { + // Only care about "Bash" matcher entries + let is_bash_matcher = entry + .get("matcher") + .and_then(|m| m.as_str()) + .is_some_and(|m| m == "Bash"); + if !is_bash_matcher { + continue; + } + // Skip skim entries + if has_skim_hook_entry(entry) { + continue; + } + // Extract command strings for reporting + if let Some(hooks) = entry.get("hooks").and_then(|h| h.as_array()) { + for hook in hooks { + if let Some(cmd) = hook.get("command").and_then(|c| c.as_str()) { + other_hooks.push(cmd.to_string()); + } + } + } + } + + other_hooks +} + +pub(super) fn check_dual_scope(flags: &InitFlags) -> anyhow::Result> { + let other_dir = if flags.project { + // Installing project-level, check global + resolve_config_dir_for_agent(false, flags.agent)? + } else { + // Installing global, check project + match resolve_config_dir_for_agent(true, flags.agent) { + Ok(dir) => dir, + Err(_) => return Ok(None), + } + }; + + let other_settings = other_dir.join(SETTINGS_FILE); + let has_hook = read_settings_json(&other_settings) + .and_then(|json| { + json.get("hooks")? + .get("PreToolUse")? + .as_array() + .map(|arr| arr.iter().any(has_skim_hook_entry)) + }) + .unwrap_or(false); + + if !has_hook { + return Ok(None); + } + + let scope = if flags.project { + "globally" + } else { + "in project" + }; + let uninstall_scope = if flags.project { + "--global" + } else { + "--project" + }; + let path = other_settings.display(); + Ok(Some(format!( + "skim hook is also installed {scope} ({path})\n \ + Both hooks will fire, but this is harmless -- the second is a no-op.\n \ + To remove: skim init {uninstall_scope} --uninstall" + ))) +} + +/// Read and parse a settings.json file, returning `None` on any failure. +/// +/// Rejects files larger than [`MAX_SETTINGS_SIZE`] to prevent OOM from +/// maliciously crafted settings files (especially in `--project` mode where +/// the file is under repository control). +pub(super) fn read_settings_json(path: &Path) -> Option { + let metadata = std::fs::metadata(path).ok()?; + if metadata.len() > MAX_SETTINGS_SIZE { + return None; + } + let contents = std::fs::read_to_string(path).ok()?; + serde_json::from_str(&contents).ok() +} + +/// Check if a PreToolUse entry contains a skim hook (substring match on "skim-rewrite"). +pub(crate) fn has_skim_hook_entry(entry: &serde_json::Value) -> bool { + entry + .get("hooks") + .and_then(|h| h.as_array()) + .is_some_and(|hooks| { + hooks.iter().any(|hook| { + hook.get("command") + .and_then(|c| c.as_str()) + .is_some_and(|cmd| cmd.contains("skim-rewrite")) + }) + }) +} + +/// Try to extract the skim version from the hook script referenced in a settings entry. +/// +/// SECURITY: Validates that the resolved script path is within the expected +/// `{config_dir}/hooks/` directory to prevent arbitrary file reads via +/// attacker-controlled settings.json in `--project` mode. +pub(super) fn extract_hook_version_from_entry( + entry: &serde_json::Value, + config_dir: &Path, +) -> Option { + let hooks_dir = config_dir.join("hooks"); + let hooks = entry.get("hooks")?.as_array()?; + for hook in hooks { + let cmd = hook.get("command")?.as_str()?; + if cmd.contains("skim-rewrite") { + // Try reading the script file + let script_path = if cmd.starts_with('/') || cmd.starts_with('.') { + PathBuf::from(cmd) + } else { + hooks_dir.join(HOOK_SCRIPT_NAME) + }; + + // Validate the resolved path is within the expected hooks directory. + // canonicalize() resolves symlinks and ".." to get the real path. + let canonical = std::fs::canonicalize(&script_path).ok()?; + let canonical_hooks_dir = std::fs::canonicalize(&hooks_dir).ok()?; + if !canonical.starts_with(&canonical_hooks_dir) { + // Path escapes the hooks directory -- skip version extraction. + return None; + } + + if let Ok(contents) = std::fs::read_to_string(&canonical) { + for line in contents.lines() { + if let Some(ver) = line.strip_prefix("# skim-hook v").or_else(|| { + line.strip_prefix("export SKIM_HOOK_VERSION=\"") + .and_then(|s| s.strip_suffix('"')) + }) { + return Some(ver.to_string()); + } + } + } + } + } + None +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_scan_existing_bash_hooks_none_input() { + // No parsed settings at all + let result = scan_existing_bash_hooks(None); + assert!(result.is_empty()); + } + + #[test] + fn test_scan_existing_bash_hooks_no_other_hooks() { + // Only skim hook + let settings = serde_json::json!({ + "hooks": { + "PreToolUse": [{ + "matcher": "Bash", + "hooks": [{"type": "command", "command": "/home/.claude/hooks/skim-rewrite.sh"}] + }] + } + }); + + let result = scan_existing_bash_hooks(Some(&settings)); + assert!(result.is_empty(), "skim entries should be excluded"); + } + + #[test] + fn test_scan_existing_bash_hooks_detects_other_bash_hook() { + // Settings with both skim and another Bash hook + let settings = serde_json::json!({ + "hooks": { + "PreToolUse": [ + { + "matcher": "Bash", + "hooks": [{"type": "command", "command": "/home/.claude/hooks/skim-rewrite.sh"}] + }, + { + "matcher": "Bash", + "hooks": [{"type": "command", "command": "/usr/bin/other-security-hook"}] + } + ] + } + }); + + let result = scan_existing_bash_hooks(Some(&settings)); + assert_eq!(result.len(), 1); + assert_eq!(result[0], "/usr/bin/other-security-hook"); + } + + #[test] + fn test_scan_existing_bash_hooks_ignores_non_bash_matchers() { + // A non-Bash matcher should be ignored + let settings = serde_json::json!({ + "hooks": { + "PreToolUse": [{ + "matcher": "Edit", + "hooks": [{"type": "command", "command": "/usr/bin/some-hook"}] + }] + } + }); + + let result = scan_existing_bash_hooks(Some(&settings)); + assert!(result.is_empty(), "non-Bash matchers should be ignored"); + } +} diff --git a/crates/rskim/src/cmd/init/uninstall.rs b/crates/rskim/src/cmd/init/uninstall.rs new file mode 100644 index 0000000..1c8997d --- /dev/null +++ b/crates/rskim/src/cmd/init/uninstall.rs @@ -0,0 +1,164 @@ +//! Uninstall flow for `skim init` (B10). + +use super::flags::InitFlags; +use super::helpers::{ + atomic_write_settings, check_mark, confirm_proceed, load_or_create_settings, + resolve_config_dir_for_agent, resolve_real_settings_path, HOOK_SCRIPT_NAME, SETTINGS_FILE, +}; +use super::state::{has_skim_hook_entry, read_settings_json}; + +/// Remove skim hook entries and marketplace registration from a settings.json value. +/// +/// 1. Removes skim entries from `hooks.PreToolUse` array +/// 2. Cleans up empty arrays/objects +/// 3. Removes `skim` from `extraKnownMarketplaces` +fn remove_skim_from_settings(settings: &mut serde_json::Value) { + let obj = match settings.as_object_mut() { + Some(obj) => obj, + None => return, + }; + + // Remove skim from PreToolUse + let hooks_empty = obj + .get_mut("hooks") + .and_then(|h| h.as_object_mut()) + .map(|hooks_obj| { + let ptu_empty = hooks_obj + .get_mut("PreToolUse") + .and_then(|ptu| ptu.as_array_mut()) + .map(|arr| { + arr.retain(|entry| !has_skim_hook_entry(entry)); + arr.is_empty() + }) + .unwrap_or(false); + if ptu_empty { + hooks_obj.remove("PreToolUse"); + } + hooks_obj.is_empty() + }) + .unwrap_or(false); + if hooks_empty { + obj.remove("hooks"); + } + + // Remove from extraKnownMarketplaces + let mkts_empty = obj + .get_mut("extraKnownMarketplaces") + .and_then(|m| m.as_object_mut()) + .map(|mkts_obj| { + mkts_obj.remove("skim"); + mkts_obj.is_empty() + }) + .unwrap_or(false); + if mkts_empty { + obj.remove("extraKnownMarketplaces"); + } +} + +pub(super) fn run_uninstall(flags: &InitFlags) -> anyhow::Result { + let config_dir = resolve_config_dir_for_agent(flags.project, flags.agent)?; + let settings_path = config_dir.join(SETTINGS_FILE); + let hook_script_path = config_dir.join("hooks").join(HOOK_SCRIPT_NAME); + + // Check if anything is installed + let settings_has_hook = read_settings_json(&settings_path) + .and_then(|json| { + json.get("hooks")? + .get("PreToolUse")? + .as_array() + .map(|arr| arr.iter().any(has_skim_hook_entry)) + }) + .unwrap_or(false); + + let script_exists = hook_script_path.exists(); + + if !settings_has_hook && !script_exists { + println!(" skim hook not found. Nothing to uninstall."); + return Ok(std::process::ExitCode::SUCCESS); + } + + // Integrity check (#57): warn if hook script has been modified since install + if script_exists { + if let Ok(false) = crate::cmd::integrity::verify_script_integrity( + &config_dir, + flags.agent.cli_name(), + &hook_script_path, + ) { + if !flags.force { + eprintln!("warning: hook script has been modified since installation"); + eprintln!("hint: use --force to uninstall anyway"); + return Ok(std::process::ExitCode::FAILURE); + } + // --force provided: proceed despite tamper, but inform user + eprintln!("warning: hook script has been modified (proceeding with --force)"); + } + } + + // Interactive confirmation + if !flags.yes { + println!(); + println!(" skim init --uninstall"); + println!(); + if settings_has_hook { + println!(" * Remove hook entry from {}", settings_path.display()); + println!(" * Remove skim from extraKnownMarketplaces"); + } + if script_exists { + println!(" * Delete {}", hook_script_path.display()); + } + println!(); + if !confirm_proceed()? { + println!(" Cancelled."); + return Ok(std::process::ExitCode::SUCCESS); + } + } + + if flags.dry_run { + if settings_has_hook { + println!( + " [dry-run] Would remove hook entry from {}", + settings_path.display() + ); + println!(" [dry-run] Would remove skim from extraKnownMarketplaces"); + } + if script_exists { + println!(" [dry-run] Would delete {}", hook_script_path.display()); + } + return Ok(std::process::ExitCode::SUCCESS); + } + + // Remove from settings.json + if settings_has_hook { + let real_path = resolve_real_settings_path(&settings_path)?; + let mut settings = load_or_create_settings(&real_path)?; + + remove_skim_from_settings(&mut settings); + + atomic_write_settings(&settings, &real_path)?; + + println!( + " {} Removed: hook entry from {}", + check_mark(true), + settings_path.display() + ); + } + + // Delete hook script and hash manifest + if script_exists { + std::fs::remove_file(&hook_script_path)?; + println!( + " {} Deleted: {}", + check_mark(true), + hook_script_path.display() + ); + + // Clean up hash manifest (#57) + let _ = crate::cmd::integrity::remove_hash_manifest(&config_dir, flags.agent.cli_name()); + } + + println!(); + println!(" skim hook has been uninstalled."); + println!(); + + Ok(std::process::ExitCode::SUCCESS) +} diff --git a/crates/rskim/src/cmd/integrity.rs b/crates/rskim/src/cmd/integrity.rs new file mode 100644 index 0000000..d9c6278 --- /dev/null +++ b/crates/rskim/src/cmd/integrity.rs @@ -0,0 +1,378 @@ +//! SHA-256 hook integrity verification (#57). +//! +//! Provides hash-based tamper detection for skim hook scripts. Each agent's +//! hook script gets a companion `.sha256` manifest file stored alongside the +//! hook in `{config_dir}/hooks/`. The manifest format is: +//! +//! ```text +//! sha256: +//! ``` +//! +//! Verification follows the behavior matrix: +//! - Hook execution: log-only warnings (NEVER stderr -- GRANITE #361 Bug 3) +//! - Uninstall: stderr warning, require `--force` if tampered +//! - Install/upgrade: always recompute hash + +use sha2::{Digest, Sha256}; +use std::path::{Path, PathBuf}; + +/// Compute SHA-256 hash of file contents, returning the hex-encoded digest. +pub(crate) fn compute_file_hash(path: &Path) -> anyhow::Result { + let contents = std::fs::read(path)?; + let mut hasher = Sha256::new(); + hasher.update(&contents); + let result = hasher.finalize(); + Ok(format!("{:x}", result)) +} + +/// Write a hash manifest for an agent's hook script. +/// +/// Creates the manifest at `{config_dir}/hooks/skim-{agent_cli_name}.sha256`. +/// The manifest contains a single line: `sha256: \n`. +pub(crate) fn write_hash_manifest( + config_dir: &Path, + agent_cli_name: &str, + script_name: &str, + hash: &str, +) -> anyhow::Result<()> { + let manifest_path = manifest_path(config_dir, agent_cli_name); + let content = format!("sha256:{hash} {script_name}\n"); + // Ensure the hooks directory exists (caller may have already created it, + // but this is idempotent). + if let Some(parent) = manifest_path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(&manifest_path, content)?; + Ok(()) +} + +/// Read hash from manifest file. Returns `None` if the manifest is missing +/// or cannot be parsed. +pub(crate) fn read_hash_manifest(config_dir: &Path, agent_cli_name: &str) -> Option { + let path = manifest_path(config_dir, agent_cli_name); + let content = std::fs::read_to_string(&path).ok()?; + content + .strip_prefix("sha256:") + .and_then(|s| s.split_whitespace().next()) + .map(|s| s.to_string()) +} + +/// Verify script integrity against stored hash. +/// +/// Returns: +/// - `Ok(true)` if the hash matches OR if no manifest exists (backward compat) +/// - `Ok(false)` if the stored hash differs from the current file hash (tampered) +/// - `Err` if the script file cannot be read +pub(crate) fn verify_script_integrity( + config_dir: &Path, + agent_cli_name: &str, + script_path: &Path, +) -> anyhow::Result { + let stored_hash = match read_hash_manifest(config_dir, agent_cli_name) { + Some(h) => h, + None => return Ok(true), // Missing hash = backward compat, treat as valid + }; + let current_hash = compute_file_hash(script_path)?; + Ok(stored_hash == current_hash) +} + +/// Delete hash manifest for an agent. No-op if the file does not exist. +pub(crate) fn remove_hash_manifest(config_dir: &Path, agent_cli_name: &str) -> anyhow::Result<()> { + let path = manifest_path(config_dir, agent_cli_name); + if path.exists() { + std::fs::remove_file(&path)?; + } + Ok(()) +} + +/// Write hash manifest for an awareness file. +/// +/// Uses the key pattern `{agent_cli_name}-awareness` to track generated awareness +/// files separately from hook scripts. This enables uninstall to detect user +/// modifications and require `--force` for tampered awareness files. +#[allow(dead_code)] // Used in tests; consumed when init writes awareness files for non-Claude agents +pub(crate) fn write_awareness_hash( + config_dir: &Path, + agent_cli_name: &str, + awareness_path: &Path, +) -> anyhow::Result<()> { + let hash = compute_file_hash(awareness_path)?; + let key = format!("{agent_cli_name}-awareness"); + let file_name = awareness_path + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("awareness"); + write_hash_manifest(config_dir, &key, file_name, &hash) +} + +/// Verify integrity of an awareness file against stored hash. +/// +/// Returns `Ok(true)` if valid or no manifest (backward compat), `Ok(false)` if tampered. +#[allow(dead_code)] // Used in tests; consumed when uninstall checks awareness file integrity +pub(crate) fn verify_awareness_integrity( + config_dir: &Path, + agent_cli_name: &str, + awareness_path: &Path, +) -> anyhow::Result { + let key = format!("{agent_cli_name}-awareness"); + verify_script_integrity(config_dir, &key, awareness_path) +} + +/// Compute the manifest file path for a given agent. +fn manifest_path(config_dir: &Path, agent_cli_name: &str) -> PathBuf { + config_dir + .join("hooks") + .join(format!("skim-{agent_cli_name}.sha256")) +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_compute_file_hash_deterministic() { + let dir = tempfile::TempDir::new().unwrap(); + let file_path = dir.path().join("test.sh"); + std::fs::write(&file_path, "#!/bin/bash\necho hello\n").unwrap(); + + let hash1 = compute_file_hash(&file_path).unwrap(); + let hash2 = compute_file_hash(&file_path).unwrap(); + + assert_eq!(hash1, hash2, "Same file contents should produce same hash"); + assert_eq!(hash1.len(), 64, "SHA-256 hex digest should be 64 chars"); + // Verify it's valid hex + assert!( + hash1.chars().all(|c| c.is_ascii_hexdigit()), + "Hash should be hex" + ); + } + + #[test] + fn test_compute_file_hash_different_content() { + let dir = tempfile::TempDir::new().unwrap(); + let file1 = dir.path().join("a.sh"); + let file2 = dir.path().join("b.sh"); + std::fs::write(&file1, "content A").unwrap(); + std::fs::write(&file2, "content B").unwrap(); + + let hash1 = compute_file_hash(&file1).unwrap(); + let hash2 = compute_file_hash(&file2).unwrap(); + + assert_ne!( + hash1, hash2, + "Different content should produce different hashes" + ); + } + + #[test] + fn test_write_and_read_hash_manifest() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + std::fs::create_dir_all(config_dir.join("hooks")).unwrap(); + + let hash = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2"; + write_hash_manifest(config_dir, "claude-code", "skim-rewrite.sh", hash).unwrap(); + + let read_back = read_hash_manifest(config_dir, "claude-code"); + assert_eq!(read_back, Some(hash.to_string())); + + // Verify manifest file content format + let manifest = config_dir.join("hooks/skim-claude-code.sha256"); + let content = std::fs::read_to_string(&manifest).unwrap(); + assert_eq!(content, format!("sha256:{hash} skim-rewrite.sh\n")); + } + + #[test] + fn test_read_hash_manifest_missing() { + let dir = tempfile::TempDir::new().unwrap(); + let result = read_hash_manifest(dir.path(), "nonexistent-agent"); + assert_eq!(result, None); + } + + #[test] + fn test_verify_script_integrity_valid() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + std::fs::create_dir_all(config_dir.join("hooks")).unwrap(); + + // Create a script file + let script_path = config_dir.join("hooks/skim-rewrite.sh"); + std::fs::write(&script_path, "#!/bin/bash\nexec skim rewrite --hook\n").unwrap(); + + // Compute and store hash + let hash = compute_file_hash(&script_path).unwrap(); + write_hash_manifest(config_dir, "claude-code", "skim-rewrite.sh", &hash).unwrap(); + + // Verify -- should be valid + let result = verify_script_integrity(config_dir, "claude-code", &script_path).unwrap(); + assert!(result, "Unmodified script should verify as valid"); + } + + #[test] + fn test_verify_script_integrity_tampered() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + std::fs::create_dir_all(config_dir.join("hooks")).unwrap(); + + // Create a script file and store its hash + let script_path = config_dir.join("hooks/skim-rewrite.sh"); + std::fs::write(&script_path, "#!/bin/bash\nexec skim rewrite --hook\n").unwrap(); + let hash = compute_file_hash(&script_path).unwrap(); + write_hash_manifest(config_dir, "claude-code", "skim-rewrite.sh", &hash).unwrap(); + + // Tamper with the script + std::fs::write(&script_path, "#!/bin/bash\nexec malicious-command\n").unwrap(); + + // Verify -- should be tampered + let result = verify_script_integrity(config_dir, "claude-code", &script_path).unwrap(); + assert!(!result, "Modified script should verify as tampered"); + } + + #[test] + fn test_verify_script_integrity_missing_hash_backward_compat() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + std::fs::create_dir_all(config_dir.join("hooks")).unwrap(); + + // Create a script file but NO hash manifest + let script_path = config_dir.join("hooks/skim-rewrite.sh"); + std::fs::write(&script_path, "#!/bin/bash\nexec skim rewrite --hook\n").unwrap(); + + // Verify -- should treat as valid (backward compat) + let result = verify_script_integrity(config_dir, "claude-code", &script_path).unwrap(); + assert!( + result, + "Missing hash manifest should be treated as valid (backward compat)" + ); + } + + #[test] + fn test_remove_hash_manifest() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + std::fs::create_dir_all(config_dir.join("hooks")).unwrap(); + + // Create manifest + write_hash_manifest(config_dir, "claude-code", "skim-rewrite.sh", "abc123").unwrap(); + assert!(config_dir.join("hooks/skim-claude-code.sha256").exists()); + + // Remove it + remove_hash_manifest(config_dir, "claude-code").unwrap(); + assert!(!config_dir.join("hooks/skim-claude-code.sha256").exists()); + } + + #[test] + fn test_remove_hash_manifest_nonexistent_is_noop() { + let dir = tempfile::TempDir::new().unwrap(); + // Should not error when manifest doesn't exist + let result = remove_hash_manifest(dir.path(), "nonexistent"); + assert!(result.is_ok()); + } + + #[test] + fn test_write_hash_manifest_creates_hooks_dir() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + // hooks/ dir does NOT exist yet + + write_hash_manifest(config_dir, "claude-code", "skim-rewrite.sh", "abc123").unwrap(); + assert!(config_dir.join("hooks/skim-claude-code.sha256").exists()); + } + + #[test] + fn test_upgrade_recomputes_hash() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + std::fs::create_dir_all(config_dir.join("hooks")).unwrap(); + + let script_path = config_dir.join("hooks/skim-rewrite.sh"); + + // Version 1 content + let v1_content = "#!/bin/bash\n# skim-hook v1.0.0\nexec skim rewrite --hook\n"; + std::fs::write(&script_path, v1_content).unwrap(); + let hash_v1 = compute_file_hash(&script_path).unwrap(); + write_hash_manifest(config_dir, "claude-code", "skim-rewrite.sh", &hash_v1).unwrap(); + + // Simulate upgrade: overwrite with new version + let v2_content = "#!/bin/bash\n# skim-hook v2.0.0\nexec skim rewrite --hook\n"; + std::fs::write(&script_path, v2_content).unwrap(); + + // Old hash should detect tamper + let tampered = verify_script_integrity(config_dir, "claude-code", &script_path).unwrap(); + assert!(!tampered, "Old hash should detect new content"); + + // Recompute hash (simulating what install does on upgrade) + let hash_v2 = compute_file_hash(&script_path).unwrap(); + write_hash_manifest(config_dir, "claude-code", "skim-rewrite.sh", &hash_v2).unwrap(); + + // New hash should verify + let valid = verify_script_integrity(config_dir, "claude-code", &script_path).unwrap(); + assert!(valid, "Recomputed hash should verify after upgrade"); + assert_ne!( + hash_v1, hash_v2, + "Different content should yield different hashes" + ); + } + + #[test] + fn test_manifest_path_per_agent() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + + let path_claude = manifest_path(config_dir, "claude-code"); + let path_cursor = manifest_path(config_dir, "cursor"); + + assert_ne!(path_claude, path_cursor); + assert!(path_claude.ends_with("skim-claude-code.sha256")); + assert!(path_cursor.ends_with("skim-cursor.sha256")); + } + + #[test] + fn test_awareness_hash_round_trip() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + std::fs::create_dir_all(config_dir.join("hooks")).unwrap(); + + // Create a fake awareness file + let awareness_path = config_dir.join("AGENTS.md"); + std::fs::write( + &awareness_path, + "# skim awareness\nGenerated by skim init\n", + ) + .unwrap(); + + // Write awareness hash + write_awareness_hash(config_dir, "opencode", &awareness_path).unwrap(); + + // Verify — should be valid + let valid = verify_awareness_integrity(config_dir, "opencode", &awareness_path).unwrap(); + assert!(valid, "freshly written awareness hash should verify"); + + // Tamper with the awareness file + std::fs::write(&awareness_path, "# modified by user\n").unwrap(); + + // Verify — should be tampered + let valid = verify_awareness_integrity(config_dir, "opencode", &awareness_path).unwrap(); + assert!(!valid, "modified awareness file should fail verification"); + } + + #[test] + fn test_awareness_hash_missing_manifest() { + let dir = tempfile::TempDir::new().unwrap(); + let config_dir = dir.path(); + + let awareness_path = config_dir.join("AGENTS.md"); + std::fs::write(&awareness_path, "# some content\n").unwrap(); + + // No manifest written — should return Ok(true) for backward compat + let valid = verify_awareness_integrity(config_dir, "codex", &awareness_path).unwrap(); + assert!( + valid, + "missing manifest should be treated as valid (backward compat)" + ); + } +} diff --git a/crates/rskim/src/cmd/learn.rs b/crates/rskim/src/cmd/learn.rs index c884532..9f749b9 100644 --- a/crates/rskim/src/cmd/learn.rs +++ b/crates/rskim/src/cmd/learn.rs @@ -2,7 +2,7 @@ //! //! Scans AI agent session files for error-retry patterns: a failed Bash command //! followed by a similar successful command within the next few invocations. -//! Optionally generates a `.claude/rules/cli-corrections.md` rules file. +//! Optionally generates an agent-specific rules file (e.g., `.claude/rules/skim-corrections.md`). use std::collections::HashMap; use std::io::{self, Write}; @@ -62,10 +62,13 @@ pub(crate) fn run(args: &[String]) -> anyhow::Result { if config.json_output { print_json_report(&corrections)?; } else if config.generate { - let content = generate_rules_content(&corrections); - write_rules_file(&content, config.dry_run)?; + // Use the agent filter for rules output format, default to ClaudeCode + let rules_agent = config.agent_filter.unwrap_or(AgentKind::ClaudeCode); + let content = generate_rules_content(&corrections, rules_agent); + write_rules_file(&content, rules_agent, config.dry_run)?; } else { - print_text_report(&corrections); + let rules_agent = config.agent_filter.unwrap_or(AgentKind::ClaudeCode); + print_text_report(&corrections, rules_agent); } Ok(ExitCode::SUCCESS) @@ -111,9 +114,7 @@ fn parse_args(args: &[String]) -> anyhow::Result { if i >= args.len() { anyhow::bail!("--agent requires a value (e.g., claude-code)"); } - config.agent_filter = Some(AgentKind::from_str(&args[i]).ok_or_else(|| { - anyhow::anyhow!("unknown agent: '{}'\nSupported: claude-code", &args[i]) - })?); + config.agent_filter = Some(AgentKind::parse_cli_arg(&args[i])?); } other => { anyhow::bail!( @@ -140,6 +141,9 @@ struct CorrectionPair { pattern_type: PatternType, occurrences: usize, sessions: Vec, + /// Which agent produced this correction (for per-agent rules output). + #[allow(dead_code)] // Read in Phase 2 for per-agent filtering + agent: AgentKind, } /// Classification of how the correction differs from the original. @@ -196,9 +200,14 @@ fn detect_corrections(bash_invocations: &[&ToolInvocation]) -> Vec continue, }; - if let Some(pair) = - find_correction(bash_invocations, i, failed_cmd, result, &inv.session_id) - { + if let Some(pair) = find_correction( + bash_invocations, + i, + failed_cmd, + result, + &inv.session_id, + inv.agent, + ) { corrections.push(pair); } } @@ -213,6 +222,7 @@ fn find_correction( failed_cmd: &str, error_result: &session::ToolResult, session_id: &str, + agent: AgentKind, ) -> Option { const LOOKAHEAD: usize = 5; let end = (failed_idx + 1 + LOOKAHEAD).min(invocations.len()); @@ -232,10 +242,11 @@ fn find_correction( return Some(CorrectionPair { failed_command: failed_cmd.to_string(), successful_command: candidate_cmd.to_string(), - error_output: error_result.content.chars().take(200).collect(), + error_output: sanitize_error_output(&error_result.content), pattern_type: pattern, occurrences: 1, sessions: vec![session_id.to_string()], + agent, }); } } @@ -386,29 +397,24 @@ fn levenshtein(a: &str, b: &str) -> usize { return len_diff; } - let mut dp = vec![vec![0usize; n + 1]; m + 1]; - - for (i, row) in dp.iter_mut().enumerate().take(m + 1) { - row[0] = i; - } - for (j, val) in dp[0].iter_mut().enumerate().take(n + 1) { - *val = j; - } + // Two-row DP: O(n) space instead of O(m*n). + let mut prev: Vec = (0..=n).collect(); + let mut curr = vec![0usize; n + 1]; for i in 1..=m { + curr[0] = i; for j in 1..=n { let cost = if a_chars[i - 1] == b_chars[j - 1] { 0 } else { 1 }; - dp[i][j] = (dp[i - 1][j] + 1) - .min(dp[i][j - 1] + 1) - .min(dp[i - 1][j - 1] + cost); + curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost); } + std::mem::swap(&mut prev, &mut curr); } - dp[m][n] + prev[n] } // ============================================================================ @@ -437,15 +443,10 @@ fn truncate_utf8(s: &str, max_len: usize) -> &str { fn looks_like_error(content: &str) -> bool { let check_content = truncate_utf8(content, 1024); - let lower = check_content.to_lowercase(); + let lower = check_content.to_ascii_lowercase(); - // Quick exclusion: "0 failed" is a success indicator in test output - let has_failed = if lower.contains("failed") { - // Only count as error if there's a non-zero count before "failed" - !lower.contains("0 failed") - } else { - false - }; + // "0 failed" is a success indicator in test output — exclude it + let has_failed = lower.contains("failed") && !lower.contains("0 failed"); // Use prefix patterns to avoid matching benign occurrences like // "0 errors generated", "error_handler.rs", etc. @@ -463,7 +464,6 @@ fn looks_like_error(content: &str) -> bool { || lower.contains("command not found") || has_failed || lower.starts_with("fatal:") - || (check_content.contains("FAILED") && !lower.contains("0 failed")) || check_content.contains("Exit code") } @@ -591,8 +591,24 @@ fn looks_like_path(s: &str) -> bool { // ============================================================================ /// Generate the rules file content from correction pairs. -fn generate_rules_content(corrections: &[CorrectionPair]) -> String { +/// +/// Adds agent-specific frontmatter for Cursor (.mdc) and Copilot (.instructions.md). +fn generate_rules_content(corrections: &[CorrectionPair], agent: AgentKind) -> String { let mut output = String::new(); + + // Agent-specific frontmatter + match agent { + AgentKind::Cursor => { + output.push_str( + "---\nalwaysApply: true\ndescription: CLI corrections learned by skim\n---\n\n", + ); + } + AgentKind::CopilotCli => { + output.push_str("---\napplyTo: \"**/*\"\n---\n\n"); + } + _ => {} + } + output.push_str("# CLI Corrections\n\n"); output .push_str("Generated by `skim learn`. Common CLI mistakes detected in your sessions.\n\n"); @@ -617,44 +633,78 @@ fn generate_rules_content(corrections: &[CorrectionPair]) -> String { output } -/// Sanitize a command string for safe inclusion in a markdown rules file. +/// Sanitize a string for safe inclusion in a markdown rules file. /// /// Prevents prompt injection by: -/// - Truncating to 200 chars (commands longer than this are not useful rules) +/// - Collapsing to single line +/// - Truncating to `max_len` chars (longer strings are not useful in rules) /// - Escaping backticks to prevent breaking out of inline code /// - Stripping markdown heading markers at line start -/// - Collapsing to single line -fn sanitize_command_for_rules(cmd: &str) -> String { - // Collapse to single line, trim whitespace - let single_line: String = cmd +fn sanitize_for_rules(s: &str, max_len: usize) -> String { + let single_line: String = s .chars() .map(|c| if c == '\n' || c == '\r' { ' ' } else { c }) .collect(); let single_line = single_line.trim(); - // Truncate to max length, then escape/strip injection vectors - truncate_utf8(single_line, 200) + truncate_utf8(single_line, max_len) .replace('`', "'") .trim_start_matches('#') .trim_start() .to_string() } -/// Write the rules file to `.claude/rules/cli-corrections.md`. -fn write_rules_file(content: &str, dry_run: bool) -> anyhow::Result<()> { - let rules_dir = std::path::Path::new(".claude").join("rules"); - let rules_path = rules_dir.join("cli-corrections.md"); +/// Sanitize error output to prevent data leakage and prompt injection. +fn sanitize_error_output(error: &str) -> String { + sanitize_for_rules(error, 200) +} - if dry_run { - println!("Would write to: {}", rules_path.display()); - println!("---"); - print!("{content}"); - return Ok(()); - } +/// Sanitize a command string for safe inclusion in a markdown rules file. +fn sanitize_command_for_rules(cmd: &str) -> String { + sanitize_for_rules(cmd, 200) +} - std::fs::create_dir_all(&rules_dir)?; - std::fs::write(&rules_path, content)?; - println!("Wrote corrections to: {}", rules_path.display()); +/// Write the rules file to the appropriate agent-specific location. +/// +/// For agents with a rules directory (Claude Code, Cursor, Copilot), +/// creates the file automatically. For single-file agents (Codex, Gemini, +/// OpenCode), prints the content with instructions to paste. +fn write_rules_file(content: &str, agent: AgentKind, dry_run: bool) -> anyhow::Result<()> { + match agent.rules_dir() { + Some(dir) => { + // Directory-based agents: auto-create file + let rules_dir = std::path::Path::new(&dir); + let filename = agent.rules_filename(); + let rules_path = rules_dir.join(filename); + + // Migrate legacy filename (cli-corrections.md -> skim-corrections.md) + let legacy_path = rules_dir.join("cli-corrections.md"); + if legacy_path.exists() && !rules_path.exists() { + std::fs::rename(&legacy_path, &rules_path)?; + } + + if dry_run { + println!("Would write to: {}", rules_path.display()); + println!("---"); + print!("{content}"); + return Ok(()); + } + + std::fs::create_dir_all(rules_dir)?; + std::fs::write(&rules_path, content)?; + println!("Wrote corrections to: {}", rules_path.display()); + } + None => { + // Single-file agents: print content with instructions + println!( + "Add the following to your {} configuration:\n", + agent.display_name() + ); + println!("---"); + print!("{content}"); + println!("---"); + } + } Ok(()) } @@ -662,7 +712,7 @@ fn write_rules_file(content: &str, dry_run: bool) -> anyhow::Result<()> { // Output // ============================================================================ -fn print_text_report(corrections: &[CorrectionPair]) { +fn print_text_report(corrections: &[CorrectionPair], agent: AgentKind) { println!( "skim learn -- {} correction{} detected\n", corrections.len(), @@ -689,9 +739,14 @@ fn print_text_report(corrections: &[CorrectionPair]) { println!(); } - println!( - "hint: run `skim learn --generate` to write corrections to .claude/rules/cli-corrections.md" - ); + let target = match agent.rules_dir() { + Some(dir) => std::path::Path::new(&dir) + .join(agent.rules_filename()) + .display() + .to_string(), + None => format!("{} configuration", agent.display_name()), + }; + println!("hint: run `skim learn --generate` to write corrections to {target}"); } fn print_json_report(corrections: &[CorrectionPair]) -> anyhow::Result<()> { @@ -729,7 +784,9 @@ fn print_help() { println!(); println!("Options:"); println!(" --since Time window (e.g., 24h, 7d, 1w) [default: 7d]"); - println!(" --generate Write rules to .claude/rules/cli-corrections.md"); + println!(" (7d default provides enough history for"); + println!(" reliable error-pattern detection)"); + println!(" --generate Write rules to agent-specific rules file"); println!(" --dry-run Preview rules without writing (requires --generate)"); println!(" --agent Only scan sessions from a specific agent"); println!(" --json Output machine-readable JSON"); @@ -753,7 +810,7 @@ pub(super) fn command() -> clap::Command { clap::Arg::new("since") .long("since") .value_name("DURATION") - .help("Time window (e.g., 24h, 7d, 1w)"), + .help("Time window (e.g., 24h, 7d, 1w) [default: 7d]"), ) .arg( clap::Arg::new("generate") @@ -1104,6 +1161,7 @@ mod tests { pattern_type: PatternType::FlagTypo, occurrences: 1, sessions: vec!["sess1".to_string()], + agent: AgentKind::ClaudeCode, }; let pair2 = CorrectionPair { failed_command: "carg test".to_string(), @@ -1112,6 +1170,7 @@ mod tests { pattern_type: PatternType::FlagTypo, occurrences: 1, sessions: vec!["sess2".to_string()], + agent: AgentKind::ClaudeCode, }; let result = deduplicate_and_filter(vec![pair1, pair2]); @@ -1129,6 +1188,7 @@ mod tests { pattern_type: PatternType::MissingArg, occurrences: 1, sessions: vec!["sess1".to_string()], + agent: AgentKind::ClaudeCode, }; let result = deduplicate_and_filter(vec![pair]); @@ -1147,6 +1207,7 @@ mod tests { pattern_type: PatternType::FlagTypo, occurrences: 1, sessions: vec!["sess1".to_string()], + agent: AgentKind::ClaudeCode, }; let result = deduplicate_and_filter(vec![pair]); @@ -1162,6 +1223,7 @@ mod tests { pattern_type: PatternType::FlagTypo, occurrences: 1, sessions: vec!["sess1".to_string()], + agent: AgentKind::ClaudeCode, }; let result = deduplicate_and_filter(vec![pair]); @@ -1179,13 +1241,16 @@ mod tests { pattern_type: PatternType::FlagTypo, occurrences: 3, sessions: vec!["sess1".to_string()], + agent: AgentKind::ClaudeCode, }]; - let content = generate_rules_content(&corrections); + let content = generate_rules_content(&corrections, AgentKind::ClaudeCode); assert!(content.contains("# CLI Corrections")); assert!(content.contains("Typo (seen 3 times)")); assert!(content.contains("Instead of: `carg test`")); assert!(content.contains("Use: `cargo test`")); + // Claude Code: no frontmatter + assert!(!content.starts_with("---")); } // ---- parse_args ---- @@ -1463,4 +1528,59 @@ mod tests { "TDD cycles should not produce corrections" ); } + + // ---- per-agent rules file output ---- + // Note: rules_filename() tests moved to session::types::tests (AgentKind method) + + #[test] + fn test_generate_rules_content_cursor_frontmatter() { + let corrections = vec![CorrectionPair { + failed_command: "carg test".to_string(), + successful_command: "cargo test".to_string(), + error_output: "error".to_string(), + pattern_type: PatternType::FlagTypo, + occurrences: 1, + sessions: vec!["sess1".to_string()], + agent: AgentKind::ClaudeCode, + }]; + + let content = generate_rules_content(&corrections, AgentKind::Cursor); + assert!(content.starts_with("---\nalwaysApply: true\n")); + assert!(content.contains("description: CLI corrections learned by skim")); + assert!(content.contains("# CLI Corrections")); + } + + #[test] + fn test_generate_rules_content_copilot_frontmatter() { + let corrections = vec![CorrectionPair { + failed_command: "carg test".to_string(), + successful_command: "cargo test".to_string(), + error_output: "error".to_string(), + pattern_type: PatternType::FlagTypo, + occurrences: 1, + sessions: vec!["sess1".to_string()], + agent: AgentKind::ClaudeCode, + }]; + + let content = generate_rules_content(&corrections, AgentKind::CopilotCli); + assert!(content.starts_with("---\napplyTo:")); + assert!(content.contains("# CLI Corrections")); + } + + #[test] + fn test_generate_rules_content_codex_no_frontmatter() { + let corrections = vec![CorrectionPair { + failed_command: "carg test".to_string(), + successful_command: "cargo test".to_string(), + error_output: "error".to_string(), + pattern_type: PatternType::FlagTypo, + occurrences: 1, + sessions: vec!["sess1".to_string()], + agent: AgentKind::ClaudeCode, + }]; + + let content = generate_rules_content(&corrections, AgentKind::CodexCli); + assert!(!content.starts_with("---")); + assert!(content.starts_with("# CLI Corrections")); + } } diff --git a/crates/rskim/src/cmd/mod.rs b/crates/rskim/src/cmd/mod.rs index 8c4cef2..ed5908b 100644 --- a/crates/rskim/src/cmd/mod.rs +++ b/crates/rskim/src/cmd/mod.rs @@ -5,11 +5,15 @@ //! helper functions used by subcommand parsers (arg inspection, flag injection, //! command execution with three-tier parse degradation). +mod agents; mod build; mod completions; mod discover; mod git; +mod hook_log; +mod hooks; mod init; +mod integrity; mod learn; mod rewrite; mod session; @@ -27,6 +31,7 @@ use crate::runner::{CommandOutput, CommandRunner}; /// IMPORTANT: Only register subcommands we will actually implement. /// Keep this list exact — no broad patterns. See GRANITE lesson #336. pub(crate) const KNOWN_SUBCOMMANDS: &[&str] = &[ + "agents", "build", "completions", "discover", @@ -272,6 +277,7 @@ pub(crate) fn dispatch(subcommand: &str, args: &[String]) -> anyhow::Result agents::run(args), "build" => build::run(args), "completions" => completions::run(args), "discover" => discover::run(args), diff --git a/crates/rskim/src/cmd/rewrite.rs b/crates/rskim/src/cmd/rewrite.rs index 525298d..03f05e0 100644 --- a/crates/rskim/src/cmd/rewrite.rs +++ b/crates/rskim/src/cmd/rewrite.rs @@ -11,16 +11,18 @@ //! **Layer 2 — Custom handlers**: For commands requiring argument inspection //! (cat, head, tail) where simple prefix matching is insufficient. //! -//! **Hook mode** (`--hook`): Runs as a Claude Code PreToolUse hook. Reads JSON -//! from stdin, extracts `tool_input.command`, rewrites if matched, and emits -//! hook-protocol JSON. Never sets `permissionDecision` — skim only sets -//! `updatedInput` and lets Claude Code's permission system evaluate independently. +//! **Hook mode** (`--hook`): Runs as an agent PreToolUse hook via `HookProtocol`. +//! Reads JSON from stdin, extracts the command field (agent-specific), rewrites if +//! matched, and emits agent-specific hook-protocol JSON. Each agent's +//! `format_response()` controls the response shape — see `hooks/` module. use std::io::{self, BufRead, IsTerminal, Read}; use std::process::ExitCode; use serde::Serialize; +use super::session::AgentKind; + // ============================================================================ // Data structures // ============================================================================ @@ -109,29 +111,6 @@ struct SuggestOutput<'a> { skim_hook_version: &'a str, } -// ---- Hook response types (#44) ---- -// SECURITY INVARIANT: No `permissionDecision` field. Skim only sets `updatedInput` -// and lets Claude Code's permission system evaluate independently. - -#[derive(Serialize)] -struct HookResponse { - #[serde(rename = "hookSpecificOutput")] - hook_specific_output: HookSpecificOutput, -} - -#[derive(Serialize)] -struct HookSpecificOutput { - #[serde(rename = "hookEventName")] - hook_event_name: String, - #[serde(rename = "updatedInput")] - updated_input: UpdatedInput, -} - -#[derive(Serialize)] -struct UpdatedInput { - command: String, -} - fn serialize_category( cat: &Option, serializer: S, @@ -261,9 +240,11 @@ pub(crate) fn run(args: &[String]) -> anyhow::Result { return Ok(ExitCode::SUCCESS); } - // Hook mode: run as Claude Code PreToolUse hook (#44) + // Hook mode: run as agent PreToolUse hook (#44) if args.iter().any(|a| a == "--hook") { - return run_hook_mode(); + // Parse optional --agent flag + let agent = parse_agent_flag(args); + return run_hook_mode(agent); } // Check for --suggest flag (must be first non-help flag) @@ -311,12 +292,7 @@ pub(crate) fn run(args: &[String]) -> anyhow::Result { if !has_operator_chars { let token_refs: Vec<&str> = tokens.iter().map(|s| s.as_str()).collect(); let result = try_rewrite(&token_refs); - let rewritten = result.as_ref().map(|r| r.tokens.join(" ")); - let match_info = result - .as_ref() - .zip(rewritten.as_ref()) - .map(|(r, s)| (s.as_str(), r.category)); - return emit_result(suggest_mode, &original, match_info, false); + return emit_rewrite_result(suggest_mode, &original, result, false); } // Split into compound segments (or simple if no operators found) @@ -325,21 +301,11 @@ pub(crate) fn run(args: &[String]) -> anyhow::Result { CompoundSplitResult::Simple(simple_tokens) => { let token_refs: Vec<&str> = simple_tokens.iter().map(|s| s.as_str()).collect(); let result = try_rewrite(&token_refs); - let rewritten = result.as_ref().map(|r| r.tokens.join(" ")); - let match_info = result - .as_ref() - .zip(rewritten.as_ref()) - .map(|(r, s)| (s.as_str(), r.category)); - emit_result(suggest_mode, &original, match_info, false) + emit_rewrite_result(suggest_mode, &original, result, false) } CompoundSplitResult::Compound(segments) => { let result = try_rewrite_compound(&segments); - let rewritten = result.as_ref().map(|r| r.tokens.join(" ")); - let match_info = result - .as_ref() - .zip(rewritten.as_ref()) - .map(|(r, s)| (s.as_str(), r.category)); - emit_result(suggest_mode, &original, match_info, true) + emit_rewrite_result(suggest_mode, &original, result, true) } } } @@ -368,6 +334,24 @@ fn emit_result( } } +/// Convert a `RewriteResult` into the final output via `emit_result`. +/// +/// Joins the rewrite tokens and extracts the category, bridging the gap +/// between the internal `RewriteResult` type and the `emit_result` API. +fn emit_rewrite_result( + suggest_mode: bool, + original: &str, + result: Option, + compound: bool, +) -> anyhow::Result { + let rewritten = result.as_ref().map(|r| r.tokens.join(" ")); + let match_info = result + .as_ref() + .zip(rewritten.as_ref()) + .map(|(r, s)| (s.as_str(), r.category)); + emit_result(suggest_mode, original, match_info, compound) +} + // ============================================================================ // Core rewrite algorithm // ============================================================================ @@ -1008,11 +992,44 @@ fn try_rewrite_tail(args: &[&str]) -> Option { // Hook mode (#44) — Claude Code PreToolUse integration // ============================================================================ +/// Parse the `--agent ` flag from rewrite args. +/// +/// Returns `None` if `--agent` is not present or the value is missing. +/// Logs a warning for unknown agent names (never errors — hook mode must +/// never fail). Callers default `None` to `AgentKind::ClaudeCode`. +fn parse_agent_flag(args: &[String]) -> Option { + let mut i = 0; + while i < args.len() { + if args[i] == "--agent" { + i += 1; + if i < args.len() { + let result = AgentKind::from_str(&args[i]); + if result.is_none() { + super::hook_log::log_hook_warning(&format!( + "unknown --agent value '{}', falling back to claude-code", + &args[i] + )); + } + return result; + } + } + i += 1; + } + None +} + /// Maximum bytes to read from stdin in hook mode (64 KiB). /// Hook payloads are small JSON objects; this prevents unbounded allocation. const HOOK_MAX_STDIN_BYTES: u64 = 64 * 1024; -/// Run as a Claude Code PreToolUse hook. +/// Maximum time (in seconds) a hook invocation is allowed before self-termination. +/// +/// Prevents slow hook processing from hanging the agent indefinitely. +/// The hook exits cleanly (exit 0, empty stdout) on timeout — this is a +/// passthrough, not an error. Logs a warning to hook.log for debugging. +const HOOK_TIMEOUT_SECS: u64 = 5; + +/// Run as an agent PreToolUse hook. /// /// Protocol: /// 1. Read JSON from stdin (bounded) @@ -1022,10 +1039,47 @@ const HOOK_MAX_STDIN_BYTES: u64 = 64 * 1024; /// 5. On match: emit hook response JSON, exit 0 /// 6. On no match: exit 0, empty stdout (passthrough) /// -/// SECURITY INVARIANT: Never sets `permissionDecision`. Only sets `updatedInput`. -fn run_hook_mode() -> anyhow::Result { - // A2: Version mismatch check — rate-limited daily warning - check_hook_version_mismatch(); +/// When `agent` is None or ClaudeCode, uses existing Claude Code logic. +/// Other agents passthrough (exit 0) until Phase 2 adds implementations. +/// +/// SECURITY NOTE: Response shape is agent-specific — see each agent's +/// `format_response()` in `hooks/`. Claude Code never sets `permissionDecision`; +/// Copilot uses `permissionDecision: deny` (deny-with-suggestion pattern). +fn run_hook_mode(agent: Option) -> anyhow::Result { + use super::hooks::{protocol_for_agent, HookSupport}; + + // Watchdog: self-terminate after HOOK_TIMEOUT_SECS to prevent hanging the agent. + // Uses a detached thread so it doesn't interfere with normal processing. + // On timeout: log warning, exit 0 (passthrough — agent sees empty stdout). + std::thread::spawn(|| { + std::thread::sleep(std::time::Duration::from_secs(HOOK_TIMEOUT_SECS)); + super::hook_log::log_hook_warning("hook processing timed out after 5s, exiting"); + // SAFETY: process::exit(0) is intentional here. In hook mode, timeout means + // passthrough (the agent sees empty stdout and proceeds normally). No Drop-based + // cleanup is relied upon — all writes use explicit flush before this point, and + // the watchdog only fires when processing has stalled beyond the timeout window. + std::process::exit(0); + }); + + let agent_kind = agent.unwrap_or(AgentKind::ClaudeCode); + let protocol = protocol_for_agent(agent_kind); + + // AwarenessOnly agents (Codex, OpenCode) have no hook mechanism — passthrough immediately + if protocol.hook_support() == HookSupport::AwarenessOnly { + return Ok(ExitCode::SUCCESS); + } + + // #57: Integrity check — log-only (NEVER stderr, GRANITE #361 Bug 3). + // Only run for Claude Code where we have the hook script infrastructure. + // TODO: Extend integrity checks to Cursor, Gemini, and Copilot once their + // hook script install paths are validated (they also report RealHook support). + if agent_kind == AgentKind::ClaudeCode { + let integrity_failed = check_hook_integrity(agent_kind); + if !integrity_failed { + // A2: Version mismatch check — rate-limited daily warning + check_hook_version_mismatch(agent_kind); + } + } // Read stdin (bounded) let mut stdin_buf = String::new(); @@ -1048,16 +1102,12 @@ fn run_hook_mode() -> anyhow::Result { } }; - // Extract tool_input.command - let command = match json - .get("tool_input") - .and_then(|ti| ti.get("command")) - .and_then(|c| c.as_str()) - { - Some(cmd) => cmd.to_string(), + // Extract command using the agent-specific protocol + let command = match protocol.parse_input(&json) { + Some(input) => input.command, None => { audit_hook("", false, ""); - return Ok(ExitCode::SUCCESS); // passthrough on missing field + return Ok(ExitCode::SUCCESS); // passthrough on missing/unparseable field } }; @@ -1102,16 +1152,8 @@ fn run_hook_mode() -> anyhow::Result { match rewritten { Some(ref rewritten_cmd) => { audit_hook(&command, true, rewritten_cmd); - let response = HookResponse { - hook_specific_output: HookSpecificOutput { - hook_event_name: "PreToolUse".to_string(), - updated_input: UpdatedInput { - command: rewritten_cmd.clone(), - }, - }, - }; - // Struct contains only String fields -- serialization is infallible in practice, - // but we propagate the error rather than panicking in the hook path. + // Use agent-specific response format + let response = protocol.format_response(rewritten_cmd); let json_out = serde_json::to_string(&response)?; println!("{json_out}"); } @@ -1123,11 +1165,80 @@ fn run_hook_mode() -> anyhow::Result { Ok(ExitCode::SUCCESS) } +/// Resolve the hook config directory for the given agent. +/// +/// Delegates to the canonical `resolve_config_dir_for_agent` in `init/helpers.rs` +/// which handles agent-specific env overrides and home-directory fallback. +fn resolve_hook_config_dir(agent: AgentKind) -> Option { + super::init::resolve_config_dir_for_agent(false, agent).ok() +} + +/// Check if a daily rate-limit stamp allows warning today. +/// Returns `true` if caller should emit warning, `false` if already warned today. +/// Updates the stamp file as a side effect. +fn should_warn_today(stamp_path: &std::path::Path) -> bool { + let today = today_date_string(); + if let Ok(contents) = std::fs::read_to_string(stamp_path) { + if contents.trim() == today { + return false; + } + } + let _ = std::fs::create_dir_all(stamp_path.parent().unwrap_or(std::path::Path::new("."))); + let _ = std::fs::write(stamp_path, &today); + true +} + +/// #57: Check hook script integrity. +/// +/// Uses SHA-256 hash verification. Warnings go to log file only (NEVER +/// stderr). Returns `true` if integrity check failed (tampered), `false` +/// if valid, missing, or check was skipped. +fn check_hook_integrity(agent: AgentKind) -> bool { + let config_dir = match resolve_hook_config_dir(agent) { + Some(dir) => dir, + None => return false, + }; + + let agent_name = agent.cli_name(); + let script_path = config_dir.join("hooks").join("skim-rewrite.sh"); + + if !script_path.exists() { + return false; + } + + match super::integrity::verify_script_integrity(&config_dir, agent_name, &script_path) { + Ok(true) => false, // Valid or missing hash (backward compat) + Ok(false) => { + // Tampered! Log warning to file (NEVER stderr). + // Rate-limit: per-agent daily stamp to avoid log spam. + let stamp_path = match cache_dir() { + Some(dir) => dir.join(format!(".hook-integrity-warned-{agent_name}")), + None => { + super::hook_log::log_hook_warning(&format!( + "hook script tampered: {}", + script_path.display() + )); + return true; + } + }; + + if should_warn_today(&stamp_path) { + super::hook_log::log_hook_warning(&format!( + "hook script tampered: {} (run `skim init --yes` to reinstall)", + script_path.display() + )); + } + true + } + Err(_) => false, // Script unreadable — don't block the hook + } +} + /// A2: Check for version mismatch between hook script and binary. /// /// If `SKIM_HOOK_VERSION` is set and differs from the compiled version, -/// emit a daily warning to stderr. Rate-limited via stamp file. -fn check_hook_version_mismatch() { +/// emit a daily warning to hook.log. Rate-limited via per-agent stamp file. +fn check_hook_version_mismatch(agent: AgentKind) { let hook_version = match std::env::var("SKIM_HOOK_VERSION") { Ok(v) => v, Err(_) => return, // not set — nothing to check @@ -1138,40 +1249,35 @@ fn check_hook_version_mismatch() { return; // versions match } - // Rate limit: warn at most once per day + let agent_name = agent.cli_name(); + + // Rate limit: per-agent, warn at most once per day let stamp_path = match cache_dir() { - Some(dir) => dir.join(".hook-version-warned"), + Some(dir) => dir.join(format!(".hook-version-warned-{agent_name}")), None => return, }; - let today = today_date_string(); - - // Check if we already warned today - if let Ok(contents) = std::fs::read_to_string(&stamp_path) { - if contents.trim() == today { - return; // already warned today - } + if should_warn_today(&stamp_path) { + // Emit warning to hook log (NEVER stderr -- GRANITE #361 Bug 3) + super::hook_log::log_hook_warning(&format!( + "version mismatch: hook script v{hook_version}, binary v{compiled_version} (run `skim init --yes` to update)" + )); } - - // Emit warning - eprintln!( - "warning: skim hook version mismatch (hook script: v{hook_version}, binary: v{compiled_version})" - ); - eprintln!("hint: run `skim init --yes` to update the hook script"); - - // Update stamp file (best-effort) - let _ = std::fs::create_dir_all(stamp_path.parent().unwrap_or(std::path::Path::new("."))); - let _ = std::fs::write(&stamp_path, &today); } -/// Maximum audit log size before truncation (10 MiB). +/// Maximum audit log size before rotation (10 MiB). const AUDIT_LOG_MAX_BYTES: u64 = 10 * 1024 * 1024; +/// Maximum number of audit log archive files to keep. +const AUDIT_LOG_MAX_ARCHIVES: u32 = 3; + /// A3: Audit logging for hook invocations. /// /// When `SKIM_HOOK_AUDIT=1`, appends a JSON line to `~/.cache/skim/hook-audit.log`. -/// The log is truncated when it exceeds [`AUDIT_LOG_MAX_BYTES`] to prevent unbounded -/// disk growth. Failures are silently ignored (never break the hook). +/// The log is rotated when it exceeds [`AUDIT_LOG_MAX_BYTES`] to prevent unbounded +/// disk growth. Rotation uses the same shift scheme as `hook_log.rs`: +/// delete `.3`, rename `.2` -> `.3`, `.1` -> `.2`, current -> `.1`. +/// Failures are silently ignored (never break the hook). fn audit_hook(original: &str, matched: bool, rewritten: &str) { if std::env::var("SKIM_HOOK_AUDIT").as_deref() != Ok("1") { return; @@ -1182,10 +1288,17 @@ fn audit_hook(original: &str, matched: bool, rewritten: &str) { None => return, }; - // Truncate if the log exceeds the size limit (best-effort) + // Rotate if the log exceeds the size limit (best-effort). + // Shift scheme: delete .3, rename .2 -> .3, .1 -> .2, current -> .1. if let Ok(meta) = std::fs::metadata(&log_path) { if meta.len() >= AUDIT_LOG_MAX_BYTES { - let _ = std::fs::write(&log_path, b""); + for i in (1..AUDIT_LOG_MAX_ARCHIVES).rev() { + let from = audit_archive_path(&log_path, i); + let to = audit_archive_path(&log_path, i + 1); + let _ = std::fs::rename(&from, &to); + } + let archive_1 = audit_archive_path(&log_path, 1); + let _ = std::fs::rename(&log_path, &archive_1); } } @@ -1209,12 +1322,17 @@ fn audit_hook(original: &str, matched: bool, rewritten: &str) { } } -/// Get the skim cache directory, respecting platform conventions and `$XDG_CACHE_HOME`. -/// -/// Uses `dirs::cache_dir()` (which respects `$XDG_CACHE_HOME` on Linux) rather -/// than hardcoding `~/.cache/`, consistent with `crate::cache::get_cache_dir()`. +/// Build the path for an audit log archive file (e.g., `hook-audit.log.1`). +fn audit_archive_path(log_path: &std::path::Path, index: u32) -> std::path::PathBuf { + let mut path = log_path.as_os_str().to_owned(); + path.push(format!(".{index}")); + std::path::PathBuf::from(path) +} + +/// Re-export `cache_dir` from `hook_log` to avoid duplication. +/// See `hook_log::cache_dir` for full documentation. fn cache_dir() -> Option { - dirs::cache_dir().map(|c| c.join("skim")) + super::hook_log::cache_dir() } /// Get today's date as YYYY-MM-DD string. @@ -1228,26 +1346,10 @@ fn today_date_string() -> String { // Convert to days since epoch, then to date components let days = secs / 86400; // Simple date calculation (good enough for stamp file purposes) - let (year, month, day) = days_to_date(days); + let (year, month, day) = super::hook_log::days_to_date(days); format!("{year:04}-{month:02}-{day:02}") } -/// Convert days since Unix epoch to (year, month, day). -fn days_to_date(days_since_epoch: u64) -> (u64, u64, u64) { - // Algorithm from http://howardhinnant.github.io/date_algorithms.html - let z = days_since_epoch + 719468; - let era = z / 146097; - let doe = z - era * 146097; - let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365; - let y = yoe + era * 400; - let doy = doe - (365 * yoe + yoe / 4 - yoe / 100); - let mp = (5 * doy + 2) / 153; - let d = doy - (153 * mp + 2) / 5 + 1; - let m = if mp < 10 { mp + 3 } else { mp - 9 }; - let y = if m <= 2 { y + 1 } else { y }; - (y, m, d) -} - // ============================================================================ // Suggest mode output // ============================================================================ @@ -1261,7 +1363,7 @@ fn print_suggest(original: &str, result: Option<(&str, RewriteCategory)>, compou category: result.map(|(_, c)| c), confidence: if result.is_some() { "exact" } else { "" }, compound, - skim_hook_version: "1.0.0", + skim_hook_version: env!("CARGO_PKG_VERSION"), }; // Struct contains only primitive types (&str, u8, bool) — serialization cannot fail. let json = serde_json::to_string(&output) @@ -1290,7 +1392,13 @@ pub(super) fn command() -> clap::Command { clap::Arg::new("hook") .long("hook") .action(clap::ArgAction::SetTrue) - .help("Run as Claude Code PreToolUse hook (reads JSON from stdin)"), + .help("Run as agent PreToolUse hook (reads JSON from stdin)"), + ) + .arg( + clap::Arg::new("agent") + .long("agent") + .value_name("NAME") + .help("Agent type for hook mode (e.g., claude-code, codex, gemini)"), ) .arg( clap::Arg::new("command") @@ -1311,12 +1419,13 @@ fn print_help() { println!(); println!("Usage: skim rewrite [--suggest] ..."); println!(" echo \"cargo test\" | skim rewrite [--suggest]"); - println!(" skim rewrite --hook (Claude Code PreToolUse hook mode)"); + println!(" skim rewrite --hook (agent PreToolUse hook mode)"); println!(); println!("Options:"); - println!(" --suggest Output JSON suggestion instead of plain text"); - println!(" --hook Run as Claude Code PreToolUse hook (reads JSON from stdin)"); - println!(" --help, -h Print help information"); + println!(" --suggest Output JSON suggestion instead of plain text"); + println!(" --hook Run as agent PreToolUse hook (reads JSON from stdin)"); + println!(" --agent Agent type for hook mode (default: claude-code)"); + println!(" --help, -h Print help information"); println!(); println!("Examples:"); println!(" skim rewrite cargo test -- --nocapture"); @@ -1325,8 +1434,8 @@ fn print_help() { println!(" echo \"pytest -v\" | skim rewrite --suggest"); println!(); println!("Hook mode:"); - println!(" Reads Claude Code PreToolUse JSON from stdin, rewrites command if"); - println!(" matched, and emits hook-protocol JSON. Never sets permissionDecision."); + println!(" Reads agent PreToolUse JSON from stdin, rewrites command if matched,"); + println!(" and emits agent-specific hook-protocol JSON (see --agent flag)."); println!(); println!("Exit codes:"); println!(" 0 Rewrite found (or --suggest/--hook mode)"); @@ -2423,4 +2532,114 @@ mod tests { other => panic!("Expected Bail for variable expansion, got {:?}", other), } } + + // ======================================================================== + // parse_agent_flag + // ======================================================================== + + #[test] + fn test_parse_agent_flag_present() { + let args = vec![ + "--hook".to_string(), + "--agent".to_string(), + "claude-code".to_string(), + ]; + assert_eq!(parse_agent_flag(&args), Some(AgentKind::ClaudeCode)); + } + + #[test] + fn test_parse_agent_flag_codex() { + let args = vec![ + "--hook".to_string(), + "--agent".to_string(), + "codex".to_string(), + ]; + assert_eq!(parse_agent_flag(&args), Some(AgentKind::CodexCli)); + } + + #[test] + fn test_parse_agent_flag_absent() { + let args = vec!["--hook".to_string()]; + assert_eq!(parse_agent_flag(&args), None); + } + + #[test] + fn test_parse_agent_flag_missing_value() { + let args = vec!["--hook".to_string(), "--agent".to_string()]; + assert_eq!(parse_agent_flag(&args), None); + } + + #[test] + fn test_parse_agent_flag_unknown_agent() { + let args = vec![ + "--hook".to_string(), + "--agent".to_string(), + "unknown-agent".to_string(), + ]; + assert_eq!(parse_agent_flag(&args), None); + } + + // ======================================================================== + // Hook timeout constant + // ======================================================================== + + #[test] + fn test_hook_timeout_constant() { + assert_eq!( + HOOK_TIMEOUT_SECS, 5, + "Hook timeout must be 5 seconds (Claude Code hook timeout is 5s)" + ); + } + + #[test] + fn test_hook_max_stdin_bytes_constant() { + assert_eq!( + HOOK_MAX_STDIN_BYTES, + 64 * 1024, + "Hook max stdin must be 64 KiB" + ); + } + + // ======================================================================== + // should_warn_today rate-limit helper (TD-4) + // ======================================================================== + + #[test] + fn test_should_warn_today_no_stamp() { + let dir = tempfile::TempDir::new().unwrap(); + let stamp = dir.path().join("stamp"); + assert!( + should_warn_today(&stamp), + "should warn when no stamp exists" + ); + assert!(stamp.exists(), "stamp file should be created"); + } + + #[test] + fn test_should_warn_today_same_day() { + let dir = tempfile::TempDir::new().unwrap(); + let stamp = dir.path().join("stamp"); + std::fs::write(&stamp, today_date_string()).unwrap(); + assert!( + !should_warn_today(&stamp), + "should not warn when stamp is today" + ); + } + + #[test] + fn test_should_warn_today_stale_stamp() { + let dir = tempfile::TempDir::new().unwrap(); + let stamp = dir.path().join("stamp"); + std::fs::write(&stamp, "2020-01-01").unwrap(); + assert!( + should_warn_today(&stamp), + "should warn when stamp is from a different day" + ); + let updated = std::fs::read_to_string(&stamp).unwrap(); + assert_eq!( + updated.trim(), + today_date_string(), + "stamp should be updated to today" + ); + } } diff --git a/crates/rskim/src/cmd/session/claude.rs b/crates/rskim/src/cmd/session/claude.rs index 4bb0d03..c70f94d 100644 --- a/crates/rskim/src/cmd/session/claude.rs +++ b/crates/rskim/src/cmd/session/claude.rs @@ -5,9 +5,12 @@ use std::collections::HashMap; use std::path::PathBuf; -use super::types::*; +use super::types::{AgentKind, SessionFile, TimeFilter, ToolInput, ToolInvocation, ToolResult}; use super::SessionProvider; +/// Maximum session file size: 100 MB. +const MAX_SESSION_SIZE: u64 = 100 * 1024 * 1024; + /// Claude Code session file provider. pub(crate) struct ClaudeCodeProvider { projects_dir: PathBuf, @@ -21,7 +24,9 @@ impl ClaudeCodeProvider { let projects_dir = if let Ok(override_dir) = std::env::var("SKIM_PROJECTS_DIR") { PathBuf::from(override_dir) } else { - dirs::home_dir()?.join(".claude").join("projects") + AgentKind::ClaudeCode + .config_dir(&dirs::home_dir()?) + .join("projects") }; if projects_dir.is_dir() { @@ -119,7 +124,6 @@ impl SessionProvider for ClaudeCodeProvider { fn parse_session(&self, file: &SessionFile) -> anyhow::Result> { // Guard against unbounded reads -- reject files over 100 MB - const MAX_SESSION_SIZE: u64 = 100 * 1024 * 1024; let file_size = std::fs::metadata(&file.path)?.len(); if file_size > MAX_SESSION_SIZE { anyhow::bail!( diff --git a/crates/rskim/src/cmd/session/codex.rs b/crates/rskim/src/cmd/session/codex.rs new file mode 100644 index 0000000..938f483 --- /dev/null +++ b/crates/rskim/src/cmd/session/codex.rs @@ -0,0 +1,485 @@ +//! Codex CLI session provider. +//! +//! Parses Codex CLI event-stream JSONL session files from `~/.codex/sessions/`. +//! Directory structure: `YYYY/MM/DD/rollout-*.jsonl`. + +use std::collections::HashMap; +use std::path::PathBuf; + +use super::types::{AgentKind, SessionFile, TimeFilter, ToolInput, ToolInvocation, ToolResult}; +use super::SessionProvider; + +/// Maximum session file size: 100 MB. +const MAX_SESSION_SIZE: u64 = 100 * 1024 * 1024; + +/// Codex CLI session file provider. +pub(crate) struct CodexCliProvider { + sessions_dir: PathBuf, +} + +impl CodexCliProvider { + /// Detect Codex CLI by checking if the sessions directory exists. + /// + /// Uses `SKIM_CODEX_SESSIONS_DIR` env var override for testability. + pub(crate) fn detect() -> Option { + let sessions_dir = if let Ok(override_dir) = std::env::var("SKIM_CODEX_SESSIONS_DIR") { + PathBuf::from(override_dir) + } else { + AgentKind::CodexCli + .config_dir(&dirs::home_dir()?) + .join("sessions") + }; + + if sessions_dir.is_dir() { + Some(Self { sessions_dir }) + } else { + None + } + } +} + +/// Depth of the Codex YYYY/MM/DD/files directory structure. +const CODEX_DIR_DEPTH: usize = 4; + +/// Recursively collect `rollout-*.jsonl` files from the YYYY/MM/DD directory structure. +/// +/// At `depth < CODEX_DIR_DEPTH`, recurses into subdirectories. +/// At `depth == CODEX_DIR_DEPTH`, collects matching files with symlink guard. +fn collect_codex_files( + dir: &std::path::Path, + depth: usize, + canonical_root: &std::path::Path, +) -> Vec<(PathBuf, std::time::SystemTime)> { + let entries = match std::fs::read_dir(dir) { + Ok(entries) => entries, + Err(_) => return Vec::new(), + }; + + let mut results = Vec::new(); + for entry in entries.flatten() { + let path = entry.path(); + + if depth < CODEX_DIR_DEPTH { + // Intermediate level — recurse into subdirectories only + if path.is_dir() { + results.extend(collect_codex_files(&path, depth + 1, canonical_root)); + } + } else { + // Leaf level — collect rollout-*.jsonl files + let file_name = match path.file_name().and_then(|n| n.to_str()) { + Some(name) => name, + None => continue, + }; + if !file_name.starts_with("rollout-") + || path.extension().and_then(|e| e.to_str()) != Some("jsonl") + { + continue; + } + + // Symlink traversal guard + if let Ok(canonical_path) = path.canonicalize() { + if !canonical_path.starts_with(canonical_root) { + continue; + } + } + + if let Ok(modified) = std::fs::metadata(&path).and_then(|m| m.modified()) { + results.push((path, modified)); + } + } + } + results +} + +impl SessionProvider for CodexCliProvider { + fn agent_kind(&self) -> AgentKind { + AgentKind::CodexCli + } + + fn find_sessions(&self, filter: &TimeFilter) -> anyhow::Result> { + if !self.sessions_dir.is_dir() { + return Ok(Vec::new()); + } + + // Canonicalize sessions_dir to prevent symlink traversal outside boundary + let canonical_root = self + .sessions_dir + .canonicalize() + .unwrap_or_else(|_| self.sessions_dir.clone()); + + // Collect all matching files from YYYY/MM/DD structure + let files = collect_codex_files(&self.sessions_dir, 1, &canonical_root); + + // Filter by time, map to SessionFile, sort, truncate + let mut sessions: Vec = files + .into_iter() + .filter(|(_, modified)| filter.since.is_none_or(|since| *modified >= since)) + .map(|(path, modified)| { + let session_id = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + .to_string(); + SessionFile { + path, + modified, + agent: AgentKind::CodexCli, + session_id, + } + }) + .collect(); + + // Sort by modification time (newest first) + sessions.sort_by(|a, b| b.modified.cmp(&a.modified)); + + // Apply latest_only filter + if filter.latest_only { + sessions.truncate(1); + } + + Ok(sessions) + } + + fn parse_session(&self, file: &SessionFile) -> anyhow::Result> { + // Guard against unbounded reads -- reject files over 100 MB + let file_size = std::fs::metadata(&file.path)?.len(); + if file_size > MAX_SESSION_SIZE { + anyhow::bail!( + "session file too large ({:.1} MB, limit {:.0} MB): {}", + file_size as f64 / (1024.0 * 1024.0), + MAX_SESSION_SIZE as f64 / (1024.0 * 1024.0), + file.path.display() + ); + } + + let content = std::fs::read_to_string(&file.path)?; + parse_codex_jsonl(&content, &file.session_id) + } +} + +/// Parse Codex CLI JSONL content into tool invocations. +/// +/// Correlates `codex.tool_decision` events with `codex.tool_result` events +/// by matching `tool_decision_id` fields. +fn parse_codex_jsonl(content: &str, session_id: &str) -> anyhow::Result> { + let mut invocations = Vec::new(); + // Map from tool_decision_id to index in invocations vec for result correlation + let mut pending: HashMap = HashMap::new(); + + for line in content.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + let json: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(_) => continue, // skip malformed lines gracefully + }; + + let event_type = json.get("type").and_then(|t| t.as_str()).unwrap_or(""); + let timestamp = json + .get("timestamp") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + + match event_type { + "codex.tool_decision" => { + let tool_name = json + .get("tool") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + let args = json.get("args").cloned().unwrap_or(serde_json::Value::Null); + let tool_decision_id = json + .get("tool_decision_id") + .and_then(|id| id.as_str()) + .unwrap_or("") + .to_string(); + + let input = parse_codex_tool_input(&tool_name, &args); + + let idx = invocations.len(); + invocations.push(ToolInvocation { + tool_name: tool_name.clone(), + input, + timestamp, + session_id: session_id.to_string(), + agent: AgentKind::CodexCli, + result: None, + }); + + if !tool_decision_id.is_empty() { + pending.insert(tool_decision_id, idx); + } + } + "codex.tool_result" => { + let tool_decision_id = json + .get("tool_decision_id") + .and_then(|id| id.as_str()) + .unwrap_or(""); + + if let Some(&idx) = pending.get(tool_decision_id) { + let result_content = json + .get("result") + .and_then(|r| r.get("content")) + .and_then(|c| c.as_str()) + .unwrap_or("") + .to_string(); + let is_error = json + .get("result") + .and_then(|r| r.get("is_error")) + .and_then(|e| e.as_bool()) + .unwrap_or(false); + + invocations[idx].result = Some(ToolResult { + content: result_content, + is_error, + }); + pending.remove(tool_decision_id); + } + } + _ => {} // skip unknown event types + } + } + + Ok(invocations) +} + +/// Map Codex CLI tool names to normalized ToolInput enum. +/// +/// Codex uses lowercase tool names: "bash", "read", "write", "edit", "glob", "grep". +fn parse_codex_tool_input(tool_name: &str, args: &serde_json::Value) -> ToolInput { + match tool_name { + "bash" => { + let command = args + .get("command") + .and_then(|c| c.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Bash { command } + } + "read" => { + let file_path = args + .get("file_path") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Read { file_path } + } + "write" => { + let file_path = args + .get("file_path") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Write { file_path } + } + "edit" => { + let file_path = args + .get("file_path") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Edit { file_path } + } + "glob" => { + let pattern = args + .get("pattern") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Glob { pattern } + } + "grep" => { + let pattern = args + .get("pattern") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Grep { pattern } + } + _ => ToolInput::Other { + tool_name: tool_name.to_string(), + raw: args.clone(), + }, + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_tool_decision_bash() { + let jsonl = r#"{"type":"codex.tool_decision","tool":"bash","args":{"command":"cargo test"},"timestamp":"2026-03-01T10:00:00Z","session_id":"sess-abc","tool_decision_id":"td-001"}"#; + let invocations = parse_codex_jsonl(jsonl, "sess-abc").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].tool_name, "bash"); + assert!(matches!( + &invocations[0].input, + ToolInput::Bash { command } if command == "cargo test" + )); + assert_eq!(invocations[0].agent, AgentKind::CodexCli); + assert_eq!(invocations[0].timestamp, "2026-03-01T10:00:00Z"); + } + + #[test] + fn test_parse_tool_decision_read() { + let jsonl = r#"{"type":"codex.tool_decision","tool":"read","args":{"file_path":"/tmp/main.rs"},"timestamp":"2026-03-01T10:00:02Z","session_id":"sess-abc","tool_decision_id":"td-002"}"#; + let invocations = parse_codex_jsonl(jsonl, "sess-abc").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].tool_name, "read"); + assert!(matches!( + &invocations[0].input, + ToolInput::Read { file_path } if file_path == "/tmp/main.rs" + )); + } + + #[test] + fn test_correlate_tool_result() { + let jsonl = concat!( + r#"{"type":"codex.tool_decision","tool":"bash","args":{"command":"cargo test"},"timestamp":"2026-03-01T10:00:00Z","session_id":"sess-abc","tool_decision_id":"td-001"}"#, + "\n", + r#"{"type":"codex.tool_result","tool":"bash","result":{"content":"test result: ok","is_error":false},"timestamp":"2026-03-01T10:00:01Z","session_id":"sess-abc","tool_decision_id":"td-001"}"# + ); + let invocations = parse_codex_jsonl(jsonl, "sess-abc").unwrap(); + assert_eq!(invocations.len(), 1); + assert!(invocations[0].result.is_some()); + let result = invocations[0].result.as_ref().unwrap(); + assert_eq!(result.content, "test result: ok"); + assert!(!result.is_error); + } + + #[test] + fn test_skip_malformed_lines() { + let jsonl = "not json\n{}\n"; + let invocations = parse_codex_jsonl(jsonl, "sess-abc").unwrap(); + assert_eq!(invocations.len(), 0); + } + + #[test] + fn test_empty_input() { + let invocations = parse_codex_jsonl("", "sess-abc").unwrap(); + assert_eq!(invocations.len(), 0); + } + + #[test] + fn test_multiple_tools_in_session() { + let jsonl = concat!( + r#"{"type":"codex.tool_decision","tool":"bash","args":{"command":"cargo test"},"timestamp":"2026-03-01T10:00:00Z","session_id":"sess-abc","tool_decision_id":"td-001"}"#, + "\n", + r#"{"type":"codex.tool_decision","tool":"read","args":{"file_path":"/tmp/main.rs"},"timestamp":"2026-03-01T10:00:02Z","session_id":"sess-abc","tool_decision_id":"td-002"}"#, + "\n", + r#"{"type":"codex.tool_decision","tool":"write","args":{"file_path":"/tmp/out.rs"},"timestamp":"2026-03-01T10:00:04Z","session_id":"sess-abc","tool_decision_id":"td-003"}"# + ); + let invocations = parse_codex_jsonl(jsonl, "sess-abc").unwrap(); + assert_eq!(invocations.len(), 3); + assert_eq!(invocations[0].tool_name, "bash"); + assert_eq!(invocations[1].tool_name, "read"); + assert_eq!(invocations[2].tool_name, "write"); + } + + #[test] + fn test_tool_result_with_error() { + let jsonl = concat!( + r#"{"type":"codex.tool_decision","tool":"bash","args":{"command":"git diff"},"timestamp":"2026-03-01T10:00:04Z","session_id":"sess-abc","tool_decision_id":"td-003"}"#, + "\n", + r#"{"type":"codex.tool_result","tool":"bash","result":{"content":"error: not a git repository","is_error":true},"timestamp":"2026-03-01T10:00:05Z","session_id":"sess-abc","tool_decision_id":"td-003"}"# + ); + let invocations = parse_codex_jsonl(jsonl, "sess-abc").unwrap(); + assert_eq!(invocations.len(), 1); + assert!(invocations[0].result.is_some()); + let result = invocations[0].result.as_ref().unwrap(); + assert_eq!(result.content, "error: not a git repository"); + assert!(result.is_error); + } + + #[test] + fn test_uncorrelated_result_ignored() { + // A tool_result with no matching tool_decision should not crash + let jsonl = r#"{"type":"codex.tool_result","tool":"bash","result":{"content":"orphan","is_error":false},"timestamp":"2026-03-01T10:00:05Z","session_id":"sess-abc","tool_decision_id":"td-999"}"#; + let invocations = parse_codex_jsonl(jsonl, "sess-abc").unwrap(); + assert_eq!(invocations.len(), 0); + } + + #[test] + fn test_parse_codex_tool_input_variants() { + let write_args = serde_json::json!({"file_path": "/tmp/out.rs"}); + let result = parse_codex_tool_input("write", &write_args); + assert!(matches!(result, ToolInput::Write { file_path } if file_path == "/tmp/out.rs")); + + let edit_args = serde_json::json!({"file_path": "/tmp/edit.rs"}); + let result = parse_codex_tool_input("edit", &edit_args); + assert!(matches!(result, ToolInput::Edit { file_path } if file_path == "/tmp/edit.rs")); + + let glob_args = serde_json::json!({"pattern": "**/*.rs"}); + let result = parse_codex_tool_input("glob", &glob_args); + assert!(matches!(result, ToolInput::Glob { pattern } if pattern == "**/*.rs")); + + let grep_args = serde_json::json!({"pattern": "fn main"}); + let result = parse_codex_tool_input("grep", &grep_args); + assert!(matches!(result, ToolInput::Grep { pattern } if pattern == "fn main")); + + let other_args = serde_json::json!({"foo": "bar"}); + let result = parse_codex_tool_input("unknown_tool", &other_args); + assert!( + matches!(result, ToolInput::Other { tool_name, .. } if tool_name == "unknown_tool") + ); + } + + #[test] + fn test_decision_without_id_skips_correlation() { + // A tool_decision without tool_decision_id should still be parsed, + // but results won't correlate (the empty-string key won't match). + let jsonl = concat!( + r#"{"type":"codex.tool_decision","tool":"bash","args":{"command":"echo hi"},"timestamp":"2026-03-01T10:00:00Z","session_id":"sess-abc"}"#, + "\n", + r#"{"type":"codex.tool_result","tool":"bash","result":{"content":"hi","is_error":false},"timestamp":"2026-03-01T10:00:01Z","session_id":"sess-abc","tool_decision_id":"td-001"}"# + ); + let invocations = parse_codex_jsonl(jsonl, "sess-abc").unwrap(); + assert_eq!(invocations.len(), 1); + // Result should NOT be correlated since the decision had no tool_decision_id + // (empty string key won't match "td-001") + assert!(invocations[0].result.is_none()); + } + + // ======================================================================== + // collect_codex_files recursive helper (TD-1) + // ======================================================================== + + #[test] + fn test_collect_codex_files_date_structure() { + let dir = tempfile::TempDir::new().unwrap(); + // Canonicalize to handle macOS /var -> /private/var symlink + let root = dir.path().canonicalize().unwrap(); + // Create YYYY/MM/DD structure with a rollout file + let day_dir = root.join("2026").join("03").join("26"); + std::fs::create_dir_all(&day_dir).unwrap(); + std::fs::write(day_dir.join("rollout-abc.jsonl"), "{}").unwrap(); + // Also add a non-matching file + std::fs::write(day_dir.join("other.txt"), "nope").unwrap(); + + let files = collect_codex_files(&root, 1, &root); + assert_eq!(files.len(), 1); + assert!(files[0].0.ends_with("rollout-abc.jsonl")); + } + + #[test] + fn test_collect_codex_files_ignores_wrong_depth() { + let dir = tempfile::TempDir::new().unwrap(); + let root = dir.path().canonicalize().unwrap(); + // File at depth 2 (YYYY/rollout-*.jsonl) — should NOT be collected + let year_dir = root.join("2026"); + std::fs::create_dir_all(&year_dir).unwrap(); + std::fs::write(year_dir.join("rollout-orphan.jsonl"), "{}").unwrap(); + + let files = collect_codex_files(&root, 1, &root); + assert!(files.is_empty(), "files at wrong depth should be ignored"); + } +} diff --git a/crates/rskim/src/cmd/session/copilot.rs b/crates/rskim/src/cmd/session/copilot.rs new file mode 100644 index 0000000..dee5aca --- /dev/null +++ b/crates/rskim/src/cmd/session/copilot.rs @@ -0,0 +1,554 @@ +//! Copilot CLI session provider. +//! +//! Parses Copilot CLI timeline JSONL session files from `~/.copilot/session-state/`. +//! Session files may optionally contain a YAML metadata header (delimited by `---`) +//! followed by JSONL tool events. + +use std::collections::HashMap; +use std::path::PathBuf; + +use super::types::{AgentKind, SessionFile, TimeFilter, ToolInput, ToolInvocation, ToolResult}; +use super::SessionProvider; + +/// Maximum session file size: 100 MB. +const MAX_SESSION_SIZE: u64 = 100 * 1024 * 1024; + +/// Copilot CLI session file provider. +pub(crate) struct CopilotCliProvider { + sessions_dir: PathBuf, +} + +impl CopilotCliProvider { + /// Detect Copilot CLI by checking if the session directory exists. + /// + /// Uses `SKIM_COPILOT_DIR` env var override for testability. + pub(crate) fn detect() -> Option { + let sessions_dir = if let Ok(override_dir) = std::env::var("SKIM_COPILOT_DIR") { + PathBuf::from(override_dir) + } else { + dirs::home_dir()?.join(".copilot").join("session-state") + }; + + if sessions_dir.is_dir() { + Some(Self { sessions_dir }) + } else { + None + } + } +} + +impl SessionProvider for CopilotCliProvider { + fn agent_kind(&self) -> AgentKind { + AgentKind::CopilotCli + } + + fn find_sessions(&self, filter: &TimeFilter) -> anyhow::Result> { + let mut sessions = Vec::new(); + + // Canonicalize sessions_dir to prevent symlink traversal outside boundary + let canonical_root = self + .sessions_dir + .canonicalize() + .unwrap_or_else(|_| self.sessions_dir.clone()); + + let entries = std::fs::read_dir(&self.sessions_dir)?; + for entry in entries.flatten() { + let path = entry.path(); + + // Accept .jsonl files + if path.extension().and_then(|e| e.to_str()) != Some("jsonl") { + continue; + } + + // Verify resolved path stays within the session directory (symlink traversal guard) + if let Ok(canonical_path) = path.canonicalize() { + if !canonical_path.starts_with(&canonical_root) { + eprintln!( + "warning: skipping file outside session dir: {}", + path.display() + ); + continue; + } + } + + let modified = match std::fs::metadata(&path).and_then(|m| m.modified()) { + Ok(t) => t, + Err(e) => { + eprintln!( + "warning: could not read metadata for {}: {}", + path.display(), + e + ); + continue; + } + }; + + // Apply time filter + if let Some(since) = filter.since { + if modified < since { + continue; + } + } + + let session_id = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + .to_string(); + + sessions.push(SessionFile { + path, + modified, + agent: AgentKind::CopilotCli, + session_id, + }); + } + + // Sort by modification time (newest first) + sessions.sort_by(|a, b| b.modified.cmp(&a.modified)); + + // Apply latest_only filter + if filter.latest_only { + sessions.truncate(1); + } + + Ok(sessions) + } + + fn parse_session(&self, file: &SessionFile) -> anyhow::Result> { + // Guard against unbounded reads -- reject files over 100 MB + let file_size = std::fs::metadata(&file.path)?.len(); + if file_size > MAX_SESSION_SIZE { + anyhow::bail!( + "session file too large ({:.1} MB, limit {:.0} MB): {}", + file_size as f64 / (1024.0 * 1024.0), + MAX_SESSION_SIZE as f64 / (1024.0 * 1024.0), + file.path.display() + ); + } + + let content = std::fs::read_to_string(&file.path)?; + parse_copilot_jsonl(&content, &file.session_id) + } +} + +/// Skip optional YAML header, returning only the JSONL body. +/// +/// If the first non-empty line is `---`, scans forward until the closing +/// `---` delimiter and returns the content after it. Otherwise returns +/// the original content unchanged. +fn skip_yaml_header(content: &str) -> &str { + let trimmed = content.trim_start(); + if !trimmed.starts_with("---") { + return content; + } + + // Find the first `---` line + let after_first = match trimmed.strip_prefix("---") { + Some(rest) => rest.trim_start_matches(['\r', ' ', '\t']), + None => return content, + }; + + // Skip leading newline after first --- + let after_first = after_first.strip_prefix('\n').unwrap_or(after_first); + + // Find the closing `---` + if let Some(end_idx) = after_first.find("\n---") { + let rest_start = end_idx + 4; // skip "\n---" + if rest_start < after_first.len() { + &after_first[rest_start..] + } else { + "" + } + } else { + // No closing `---` found; treat entire content as JSONL (no valid header) + content + } +} + +/// Parse Copilot CLI JSONL content into tool invocations. +/// +/// Handles optional YAML header, then parses timeline events: +/// - `tool_use` events create invocations +/// - `tool_result` events are correlated by `toolUseId` -> `id` +fn parse_copilot_jsonl(content: &str, session_id: &str) -> anyhow::Result> { + let jsonl_body = skip_yaml_header(content); + + let mut invocations = Vec::new(); + // Map from tool id to index in invocations vec for result correlation + let mut pending: HashMap = HashMap::new(); + + for line in jsonl_body.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + let json: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(_) => continue, // skip malformed lines + }; + + let event_type = json.get("type").and_then(|t| t.as_str()).unwrap_or(""); + let timestamp = json + .get("timestamp") + .and_then(|t| t.as_str()) + .unwrap_or("") + .to_string(); + + match event_type { + "tool_use" => { + let tool_id = json + .get("id") + .and_then(|id| id.as_str()) + .unwrap_or("") + .to_string(); + let tool_name = json + .get("toolName") + .and_then(|n| n.as_str()) + .unwrap_or("") + .to_string(); + let tool_args = json + .get("toolArgs") + .cloned() + .unwrap_or(serde_json::Value::Null); + + let input = parse_copilot_tool_input(&tool_name, &tool_args); + + let idx = invocations.len(); + invocations.push(ToolInvocation { + tool_name: tool_name.clone(), + input, + timestamp, + session_id: session_id.to_string(), + agent: AgentKind::CopilotCli, + result: None, + }); + + if !tool_id.is_empty() { + pending.insert(tool_id, idx); + } + } + "tool_result" => { + let tool_use_id = json + .get("toolUseId") + .and_then(|id| id.as_str()) + .unwrap_or(""); + + if let Some(&idx) = pending.get(tool_use_id) { + let result_content = json + .get("content") + .and_then(|c| c.as_str()) + .unwrap_or("") + .to_string(); + let result_type = json + .get("resultType") + .and_then(|r| r.as_str()) + .unwrap_or("success"); + let is_error = result_type == "error"; + + invocations[idx].result = Some(ToolResult { + content: result_content, + is_error, + }); + pending.remove(tool_use_id); + } + } + _ => {} // skip unknown event types + } + } + + Ok(invocations) +} + +/// Map Copilot CLI tool names to normalized ToolInput enum. +fn parse_copilot_tool_input(tool_name: &str, args: &serde_json::Value) -> ToolInput { + match tool_name { + "bash" => { + let command = args + .get("command") + .and_then(|c| c.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Bash { command } + } + "readFile" => { + let file_path = args + .get("path") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Read { file_path } + } + "writeFile" => { + let file_path = args + .get("path") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Write { file_path } + } + "editFile" => { + let file_path = args + .get("path") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Edit { file_path } + } + _ => ToolInput::Other { + tool_name: tool_name.to_string(), + raw: args.clone(), + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // ---- JSONL parsing without YAML header ---- + + #[test] + fn test_parse_jsonl_without_yaml_header() { + let content = concat!( + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "cargo test"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" }"#, + "\n", + r#"{ "type": "tool_result", "toolUseId": "t-001", "resultType": "success", "content": "ok", "timestamp": "2024-06-15T10:01:05Z" }"#, + ); + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].tool_name, "bash"); + assert!(matches!( + &invocations[0].input, + ToolInput::Bash { command } if command == "cargo test" + )); + assert!(invocations[0].result.is_some()); + assert_eq!(invocations[0].result.as_ref().unwrap().content, "ok"); + assert!(!invocations[0].result.as_ref().unwrap().is_error); + } + + // ---- JSONL parsing with YAML header ---- + + #[test] + fn test_parse_jsonl_with_yaml_header() { + let content = concat!( + "---\n", + "model: gpt-4o\n", + "session_start: \"2024-06-15T10:00:00Z\"\n", + "---\n", + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "ls"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" }"#, + "\n", + ); + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].tool_name, "bash"); + assert!(matches!( + &invocations[0].input, + ToolInput::Bash { command } if command == "ls" + )); + } + + // ---- Tool result correlation ---- + + #[test] + fn test_correlate_tool_result() { + let content = concat!( + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "echo hi"}, "id": "t-010", "timestamp": "2024-06-15T10:01:00Z" }"#, + "\n", + r#"{ "type": "tool_result", "toolUseId": "t-010", "resultType": "success", "content": "hi", "timestamp": "2024-06-15T10:01:01Z" }"#, + ); + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert!(invocations[0].result.is_some()); + assert_eq!(invocations[0].result.as_ref().unwrap().content, "hi"); + assert!(!invocations[0].result.as_ref().unwrap().is_error); + } + + // ---- Error result type ---- + + #[test] + fn test_result_type_error() { + let content = concat!( + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "false"}, "id": "t-020", "timestamp": "2024-06-15T10:01:00Z" }"#, + "\n", + r#"{ "type": "tool_result", "toolUseId": "t-020", "resultType": "error", "content": "command failed", "timestamp": "2024-06-15T10:01:01Z" }"#, + ); + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert!(invocations[0].result.as_ref().unwrap().is_error); + assert_eq!( + invocations[0].result.as_ref().unwrap().content, + "command failed" + ); + } + + // ---- Skip malformed lines ---- + + #[test] + fn test_skip_malformed_lines() { + let content = "not json\n{}\n"; + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 0); + } + + // ---- Empty input ---- + + #[test] + fn test_empty_input() { + let invocations = parse_copilot_jsonl("", "sess1").unwrap(); + assert_eq!(invocations.len(), 0); + } + + // ---- Multiple tools ---- + + #[test] + fn test_multiple_tools() { + let content = concat!( + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "cargo test"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" }"#, + "\n", + r#"{ "type": "tool_result", "toolUseId": "t-001", "resultType": "success", "content": "ok", "timestamp": "2024-06-15T10:01:05Z" }"#, + "\n", + r#"{ "type": "tool_use", "toolName": "readFile", "toolArgs": {"path": "/tmp/main.rs"}, "id": "t-002", "timestamp": "2024-06-15T10:02:00Z" }"#, + "\n", + r#"{ "type": "tool_result", "toolUseId": "t-002", "resultType": "success", "content": "fn main() {}", "timestamp": "2024-06-15T10:02:01Z" }"#, + "\n", + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "git status"}, "id": "t-003", "timestamp": "2024-06-15T10:03:00Z" }"#, + ); + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 3); + + // First: bash with result + assert_eq!(invocations[0].tool_name, "bash"); + assert!(invocations[0].result.is_some()); + + // Second: readFile mapped to Read + assert_eq!(invocations[1].tool_name, "readFile"); + assert!( + matches!(&invocations[1].input, ToolInput::Read { file_path } if file_path == "/tmp/main.rs") + ); + assert!(invocations[1].result.is_some()); + + // Third: bash without result (no matching tool_result) + assert_eq!(invocations[2].tool_name, "bash"); + assert!(invocations[2].result.is_none()); + } + + // ---- YAML header skipping ---- + + #[test] + fn test_skip_yaml_header() { + let content = concat!( + "---\n", + "model: gpt-4o\n", + "session_start: \"2024-06-15T10:00:00Z\"\n", + "project: \"/home/user/myproject\"\n", + "---\n", + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "echo test"}, "id": "t-100", "timestamp": "2024-06-15T10:05:00Z" }"#, + ); + + let body = skip_yaml_header(content); + // Body should contain the JSONL events, not the YAML header + assert!(!body.is_empty()); + assert!(!body.contains("model: gpt-4o")); + + // Full parse from original content works + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + } + + // ---- Tool input mapping ---- + + #[test] + fn test_tool_input_bash() { + let args = serde_json::json!({"command": "cargo build"}); + let input = parse_copilot_tool_input("bash", &args); + assert!(matches!(input, ToolInput::Bash { command } if command == "cargo build")); + } + + #[test] + fn test_tool_input_read_file() { + let args = serde_json::json!({"path": "/tmp/test.rs"}); + let input = parse_copilot_tool_input("readFile", &args); + assert!(matches!(input, ToolInput::Read { file_path } if file_path == "/tmp/test.rs")); + } + + #[test] + fn test_tool_input_write_file() { + let args = serde_json::json!({"path": "/tmp/out.rs"}); + let input = parse_copilot_tool_input("writeFile", &args); + assert!(matches!(input, ToolInput::Write { file_path } if file_path == "/tmp/out.rs")); + } + + #[test] + fn test_tool_input_edit_file() { + let args = serde_json::json!({"path": "/tmp/edit.rs"}); + let input = parse_copilot_tool_input("editFile", &args); + assert!(matches!(input, ToolInput::Edit { file_path } if file_path == "/tmp/edit.rs")); + } + + #[test] + fn test_tool_input_unknown() { + let args = serde_json::json!({"foo": "bar"}); + let input = parse_copilot_tool_input("unknownTool", &args); + assert!(matches!(input, ToolInput::Other { tool_name, .. } if tool_name == "unknownTool")); + } + + // ---- Agent kind ---- + + #[test] + fn test_agent_kind_is_copilot() { + let content = r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "ls"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" }"#; + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].agent, AgentKind::CopilotCli); + } + + // ---- Session ID propagation ---- + + #[test] + fn test_session_id_propagation() { + let content = r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "ls"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" }"#; + let invocations = parse_copilot_jsonl(content, "my-session-42").unwrap(); + assert_eq!(invocations[0].session_id, "my-session-42"); + } + + // ---- Timestamp propagation ---- + + #[test] + fn test_timestamp_propagation() { + let content = r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "ls"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" }"#; + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations[0].timestamp, "2024-06-15T10:01:00Z"); + } + + // ---- No closing YAML delimiter ---- + + #[test] + fn test_yaml_header_no_closing_delimiter() { + // If there's no closing `---`, treat entire content as JSONL + let content = concat!( + "---\n", + "model: gpt-4o\n", + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "ls"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" }"#, + ); + let body = skip_yaml_header(content); + // Without closing delimiter, returns original content + assert_eq!(body, content); + + // Full parse should still attempt to parse lines (malformed YAML lines will be skipped) + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + // The `---` and `model:` lines are not valid JSON, so they get skipped. + // The tool_use line is valid JSON and should parse. + assert_eq!(invocations.len(), 1); + } + + // ---- Uncorrelated result is ignored ---- + + #[test] + fn test_uncorrelated_result_ignored() { + let content = r#"{ "type": "tool_result", "toolUseId": "nonexistent", "resultType": "success", "content": "orphan", "timestamp": "2024-06-15T10:01:00Z" }"#; + let invocations = parse_copilot_jsonl(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 0); + } +} diff --git a/crates/rskim/src/cmd/session/cursor.rs b/crates/rskim/src/cmd/session/cursor.rs new file mode 100644 index 0000000..cac6a5e --- /dev/null +++ b/crates/rskim/src/cmd/session/cursor.rs @@ -0,0 +1,672 @@ +//! Cursor session provider. +//! +//! Parses Cursor's SQLite-backed session data from `state.vscdb`. +//! Cursor stores composer conversations in a `cursorDiskKV` table +//! with JSON-encoded values keyed by `composer.*`. + +use std::path::PathBuf; + +use super::types::*; +use super::SessionProvider; + +/// Maximum database file size: 100 MB. +const MAX_DB_SIZE: u64 = 100 * 1024 * 1024; + +/// Cursor session file provider. +/// +/// Reads from Cursor's `state.vscdb` SQLite database. Access is always +/// read-only with a 1-second busy timeout to avoid hanging when Cursor +/// has a write lock. +pub(crate) struct CursorProvider { + db_path: PathBuf, +} + +impl CursorProvider { + /// Detect Cursor by checking if the state database exists. + /// + /// Uses `SKIM_CURSOR_DB_PATH` env var override for testability. + pub(crate) fn detect() -> Option { + let db_path = if let Ok(override_path) = std::env::var("SKIM_CURSOR_DB_PATH") { + PathBuf::from(override_path) + } else { + default_db_path()? + }; + + if db_path.is_file() { + Some(Self { db_path }) + } else { + None + } + } +} + +/// Platform-specific default path for Cursor's state database. +fn default_db_path() -> Option { + #[cfg(target_os = "windows")] + { + // Windows uses a different base directory (AppData), not covered by config_dir() + dirs::data_dir().map(|d| d.join("Cursor/User/globalStorage/state.vscdb")) + } + + #[cfg(not(target_os = "windows"))] + { + dirs::home_dir().map(|h| { + AgentKind::Cursor + .config_dir(&h) + .join("User/globalStorage/state.vscdb") + }) + } +} + +impl SessionProvider for CursorProvider { + fn agent_kind(&self) -> AgentKind { + AgentKind::Cursor + } + + fn find_sessions(&self, filter: &TimeFilter) -> anyhow::Result> { + let rows = match query_composer_keys(&self.db_path) { + Ok(rows) => rows, + Err(e) => { + // Graceful degradation: if the database is locked or + // otherwise inaccessible, return empty rather than fail. + eprintln!("warning: could not query Cursor database: {e}"); + return Ok(Vec::new()); + } + }; + + let file_modified = std::fs::metadata(&self.db_path) + .and_then(|m| m.modified()) + .unwrap_or(std::time::SystemTime::now()); + + // Apply time filter against the database file's mtime (we cannot + // reliably get per-session timestamps from the KV table). + if let Some(since) = filter.since { + if file_modified < since { + return Ok(Vec::new()); + } + } + + let mut sessions: Vec = rows + .into_iter() + .map(|(key, _value)| SessionFile { + path: self.db_path.clone(), + modified: file_modified, + agent: AgentKind::Cursor, + session_id: key, + }) + .collect(); + + // Sort by session_id for deterministic output + sessions.sort_by(|a, b| b.session_id.cmp(&a.session_id)); + + if filter.latest_only { + sessions.truncate(1); + } + + Ok(sessions) + } + + fn parse_session(&self, file: &SessionFile) -> anyhow::Result> { + // Guard against oversized databases (consistent with other providers) + let db_size = std::fs::metadata(&self.db_path)?.len(); + if db_size > MAX_DB_SIZE { + anyhow::bail!( + "database too large ({:.1} MB, limit {:.0} MB): {}", + db_size as f64 / (1024.0 * 1024.0), + MAX_DB_SIZE as f64 / (1024.0 * 1024.0), + self.db_path.display() + ); + } + + let value = match query_single_key(&self.db_path, &file.session_id) { + Ok(Some(v)) => v, + Ok(None) => return Ok(Vec::new()), + Err(e) => { + eprintln!( + "warning: could not read Cursor session {}: {e}", + file.session_id + ); + return Ok(Vec::new()); + } + }; + + parse_cursor_json_value(&value, &file.session_id) + } +} + +// ============================================================================ +// SQLite queries (thin layer) +// ============================================================================ + +/// Query all composer session keys and their values from the database. +/// +/// Opens read-only with a 1-second busy timeout. Uses a SQL LIMIT to +/// prevent unbounded reads on large databases. +fn query_composer_keys(db_path: &std::path::Path) -> anyhow::Result> { + let conn = rusqlite::Connection::open_with_flags( + db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX, + )?; + conn.busy_timeout(std::time::Duration::from_millis(1000))?; + + let mut stmt = + conn.prepare("SELECT key, value FROM cursorDiskKV WHERE key LIKE 'composer.%' LIMIT 1000")?; + let rows = stmt + .query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })? + .filter_map(|r| r.ok()) + .collect(); + + Ok(rows) +} + +/// Query a single key's value from the database. +fn query_single_key(db_path: &std::path::Path, key: &str) -> anyhow::Result> { + let conn = rusqlite::Connection::open_with_flags( + db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX, + )?; + conn.busy_timeout(std::time::Duration::from_millis(1000))?; + + let mut stmt = conn.prepare("SELECT value FROM cursorDiskKV WHERE key = ?1 LIMIT 1")?; + let result = stmt + .query_row(rusqlite::params![key], |row| row.get::<_, String>(0)) + .ok(); + + Ok(result) +} + +// ============================================================================ +// JSON parsing (business logic, fully testable without SQLite) +// ============================================================================ + +/// Parse a Cursor composer JSON value into tool invocations. +/// +/// The JSON structure has `composerData.conversations[].messages[]` +/// where assistant messages may contain `tool_calls` and tool messages +/// contain results correlated by `tool_call_id`. +pub(super) fn parse_cursor_json_value( + json_str: &str, + session_id: &str, +) -> anyhow::Result> { + let root: serde_json::Value = serde_json::from_str(json_str) + .map_err(|e| anyhow::anyhow!("invalid JSON in Cursor session: {e}"))?; + + let conversations = match root + .get("composerData") + .and_then(|cd| cd.get("conversations")) + .and_then(|c| c.as_array()) + { + Some(convs) => convs, + None => return Ok(Vec::new()), + }; + + let mut invocations = Vec::new(); + // Map from tool_call_id to index in invocations for result correlation + let mut pending: std::collections::HashMap = std::collections::HashMap::new(); + + for conversation in conversations { + let messages = match conversation.get("messages").and_then(|m| m.as_array()) { + Some(msgs) => msgs, + None => continue, + }; + + for message in messages { + let role = message.get("role").and_then(|r| r.as_str()).unwrap_or(""); + + match role { + "assistant" => { + if let Some(tool_calls) = message.get("tool_calls").and_then(|tc| tc.as_array()) + { + process_cursor_tool_calls( + tool_calls, + session_id, + &mut invocations, + &mut pending, + ); + } + } + "tool" => { + let tool_call_id = message + .get("tool_call_id") + .and_then(|id| id.as_str()) + .unwrap_or(""); + + if let Some(&idx) = pending.get(tool_call_id) { + let content = message + .get("content") + .and_then(|c| c.as_str()) + .unwrap_or("") + .to_string(); + + invocations[idx].result = Some(ToolResult { + content, + is_error: false, + }); + pending.remove(tool_call_id); + } + } + _ => {} + } + } + } + + Ok(invocations) +} + +/// Extract tool invocations from Cursor's `tool_calls` array. +/// +/// Each tool call has `type: "function"`, a `function` object with `name` +/// and `arguments` (JSON-encoded string), and an `id` for result correlation. +fn process_cursor_tool_calls( + tool_calls: &[serde_json::Value], + session_id: &str, + invocations: &mut Vec, + pending: &mut std::collections::HashMap, +) { + for tool_call in tool_calls { + let tc_type = tool_call.get("type").and_then(|t| t.as_str()).unwrap_or(""); + if tc_type != "function" { + continue; + } + + let function = match tool_call.get("function") { + Some(f) => f, + None => continue, + }; + + let tool_name = function + .get("name") + .and_then(|n| n.as_str()) + .unwrap_or("") + .to_string(); + + let arguments_str = function + .get("arguments") + .and_then(|a| a.as_str()) + .unwrap_or("{}"); + + let arguments: serde_json::Value = serde_json::from_str(arguments_str).unwrap_or_default(); + + let input = map_cursor_tool(&tool_name, &arguments); + + let tc_id = tool_call + .get("id") + .and_then(|id| id.as_str()) + .unwrap_or("") + .to_string(); + + let idx = invocations.len(); + invocations.push(ToolInvocation { + tool_name: tool_name.clone(), + input, + timestamp: String::new(), + session_id: session_id.to_string(), + agent: AgentKind::Cursor, + result: None, + }); + + if !tc_id.is_empty() { + pending.insert(tc_id, idx); + } + } +} + +/// Map Cursor tool names to normalized ToolInput variants. +fn map_cursor_tool(tool_name: &str, arguments: &serde_json::Value) -> ToolInput { + match tool_name { + "run_terminal_command" => { + let command = arguments + .get("command") + .and_then(|c| c.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Bash { command } + } + "read_file" => { + let file_path = arguments + .get("file_path") + .or_else(|| arguments.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Read { file_path } + } + "write_file" => { + let file_path = arguments + .get("file_path") + .or_else(|| arguments.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Write { file_path } + } + "edit_file" => { + let file_path = arguments + .get("file_path") + .or_else(|| arguments.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Edit { file_path } + } + _ => ToolInput::Other { + tool_name: tool_name.to_string(), + raw: arguments.clone(), + }, + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + // ---- JSON parsing tests (no SQLite needed) ---- + + fn sample_json() -> &'static str { + r#"{ + "composerData": { + "conversations": [{ + "id": "conv-001", + "messages": [ + { + "role": "assistant", + "tool_calls": [{ + "id": "tc-001", + "type": "function", + "function": { + "name": "run_terminal_command", + "arguments": "{\"command\":\"cargo test\"}" + } + }] + }, + { + "role": "tool", + "tool_call_id": "tc-001", + "content": "test result: ok" + } + ] + }] + } + }"# + } + + #[test] + fn test_parse_cursor_json_value() { + let invocations = parse_cursor_json_value(sample_json(), "sess-1").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].tool_name, "run_terminal_command"); + assert_eq!(invocations[0].agent, AgentKind::Cursor); + assert_eq!(invocations[0].session_id, "sess-1"); + } + + #[test] + fn test_map_run_terminal_command_to_bash() { + let args = serde_json::json!({"command": "cargo test --nocapture"}); + let input = map_cursor_tool("run_terminal_command", &args); + assert!(matches!( + &input, + ToolInput::Bash { command } if command == "cargo test --nocapture" + )); + } + + #[test] + fn test_map_read_file_to_read() { + let args = serde_json::json!({"file_path": "/tmp/src/main.rs"}); + let input = map_cursor_tool("read_file", &args); + assert!(matches!( + &input, + ToolInput::Read { file_path } if file_path == "/tmp/src/main.rs" + )); + + // Also supports "path" key variant + let args_alt = serde_json::json!({"path": "/tmp/alt.rs"}); + let input_alt = map_cursor_tool("read_file", &args_alt); + assert!(matches!( + &input_alt, + ToolInput::Read { file_path } if file_path == "/tmp/alt.rs" + )); + } + + #[test] + fn test_map_write_file_to_write() { + let args = serde_json::json!({"file_path": "/tmp/out.rs"}); + let input = map_cursor_tool("write_file", &args); + assert!(matches!( + &input, + ToolInput::Write { file_path } if file_path == "/tmp/out.rs" + )); + } + + #[test] + fn test_map_edit_file_to_edit() { + let args = serde_json::json!({"file_path": "/tmp/edit.rs"}); + let input = map_cursor_tool("edit_file", &args); + assert!(matches!( + &input, + ToolInput::Edit { file_path } if file_path == "/tmp/edit.rs" + )); + } + + #[test] + fn test_map_unknown_tool_to_other() { + let args = serde_json::json!({"foo": "bar"}); + let input = map_cursor_tool("custom_tool", &args); + assert!(matches!( + &input, + ToolInput::Other { tool_name, .. } if tool_name == "custom_tool" + )); + } + + #[test] + fn test_correlate_tool_result() { + let invocations = parse_cursor_json_value(sample_json(), "sess-1").unwrap(); + assert_eq!(invocations.len(), 1); + let result = invocations[0].result.as_ref().expect("should have result"); + assert_eq!(result.content, "test result: ok"); + assert!(!result.is_error); + } + + #[test] + fn test_empty_conversations() { + let json = r#"{"composerData": {"conversations": []}}"#; + let invocations = parse_cursor_json_value(json, "sess-1").unwrap(); + assert!(invocations.is_empty()); + } + + #[test] + fn test_missing_composer_data() { + let json = r#"{"otherKey": "value"}"#; + let invocations = parse_cursor_json_value(json, "sess-1").unwrap(); + assert!(invocations.is_empty()); + } + + #[test] + fn test_malformed_json_graceful() { + let result = parse_cursor_json_value("not valid json {{{", "sess-1"); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("invalid JSON")); + } + + #[test] + fn test_malformed_arguments_graceful() { + // Arguments is not valid JSON -- should default to empty object + let json = r#"{ + "composerData": { + "conversations": [{ + "id": "conv-001", + "messages": [{ + "role": "assistant", + "tool_calls": [{ + "id": "tc-001", + "type": "function", + "function": { + "name": "run_terminal_command", + "arguments": "not valid json" + } + }] + }] + }] + } + }"#; + let invocations = parse_cursor_json_value(json, "sess-1").unwrap(); + assert_eq!(invocations.len(), 1); + // Should produce Bash with empty command (arguments parsed as null) + assert!(matches!(&invocations[0].input, ToolInput::Bash { command } if command.is_empty())); + } + + #[test] + fn test_multiple_tool_calls_in_message() { + let json = r#"{ + "composerData": { + "conversations": [{ + "id": "conv-001", + "messages": [{ + "role": "assistant", + "tool_calls": [ + { + "id": "tc-001", + "type": "function", + "function": { + "name": "read_file", + "arguments": "{\"file_path\":\"/a.rs\"}" + } + }, + { + "id": "tc-002", + "type": "function", + "function": { + "name": "read_file", + "arguments": "{\"file_path\":\"/b.rs\"}" + } + } + ] + }] + }] + } + }"#; + let invocations = parse_cursor_json_value(json, "sess-1").unwrap(); + assert_eq!(invocations.len(), 2); + } + + #[test] + fn test_multiple_conversations() { + let json = r#"{ + "composerData": { + "conversations": [ + { + "id": "conv-001", + "messages": [{ + "role": "assistant", + "tool_calls": [{ + "id": "tc-001", + "type": "function", + "function": { + "name": "run_terminal_command", + "arguments": "{\"command\":\"cargo build\"}" + } + }] + }] + }, + { + "id": "conv-002", + "messages": [{ + "role": "assistant", + "tool_calls": [{ + "id": "tc-002", + "type": "function", + "function": { + "name": "run_terminal_command", + "arguments": "{\"command\":\"cargo test\"}" + } + }] + }] + } + ] + } + }"#; + let invocations = parse_cursor_json_value(json, "sess-1").unwrap(); + assert_eq!(invocations.len(), 2); + } + + #[test] + fn test_platform_path_detection() { + // Verify default_db_path returns a path (platform-specific) + let path = default_db_path(); + // On CI or containers without a home dir this may be None, which is fine + if let Some(p) = path { + let path_str = p.to_string_lossy(); + #[cfg(target_os = "macos")] + assert!( + path_str.contains("Library/Application Support/Cursor"), + "macOS path should contain Cursor app support dir, got: {path_str}" + ); + #[cfg(target_os = "linux")] + assert!( + path_str.contains(".config/Cursor"), + "Linux path should contain .config/Cursor, got: {path_str}" + ); + } + } + + #[test] + fn test_env_override_path() { + // Use a temp path that does not exist -- detect() should return None + std::env::set_var("SKIM_CURSOR_DB_PATH", "/tmp/nonexistent_skim_test.vscdb"); + let provider = CursorProvider::detect(); + assert!( + provider.is_none(), + "detect() should return None for non-existent file" + ); + std::env::remove_var("SKIM_CURSOR_DB_PATH"); + } + + #[test] + fn test_non_function_tool_calls_skipped() { + let json = r#"{ + "composerData": { + "conversations": [{ + "id": "conv-001", + "messages": [{ + "role": "assistant", + "tool_calls": [{ + "id": "tc-001", + "type": "code_interpreter", + "function": { + "name": "run_terminal_command", + "arguments": "{\"command\":\"ls\"}" + } + }] + }] + }] + } + }"#; + let invocations = parse_cursor_json_value(json, "sess-1").unwrap(); + assert!( + invocations.is_empty(), + "non-function tool calls should be skipped" + ); + } + + #[test] + fn test_message_without_tool_calls() { + let json = r#"{ + "composerData": { + "conversations": [{ + "id": "conv-001", + "messages": [{ + "role": "assistant", + "content": "Here is the answer" + }] + }] + } + }"#; + let invocations = parse_cursor_json_value(json, "sess-1").unwrap(); + assert!(invocations.is_empty()); + } +} diff --git a/crates/rskim/src/cmd/session/gemini.rs b/crates/rskim/src/cmd/session/gemini.rs new file mode 100644 index 0000000..ac5da71 --- /dev/null +++ b/crates/rskim/src/cmd/session/gemini.rs @@ -0,0 +1,512 @@ +//! Gemini CLI session provider +//! +//! Parses Gemini CLI session files from `~/.gemini/tmp/`. +//! Supports dual format: legacy JSON array and current JSONL. + +use std::collections::HashMap; +use std::path::PathBuf; + +use super::types::*; +use super::SessionProvider; + +/// Maximum session file size (100 MB) to prevent unbounded reads. +const MAX_SESSION_SIZE: u64 = 100 * 1024 * 1024; + +/// Gemini CLI session file provider. +pub(crate) struct GeminiCliProvider { + gemini_dir: PathBuf, +} + +impl GeminiCliProvider { + /// Detect Gemini CLI by checking if the session directory exists. + /// + /// Uses `SKIM_GEMINI_DIR` env var override for testability. + pub(crate) fn detect() -> Option { + let gemini_dir = if let Ok(override_dir) = std::env::var("SKIM_GEMINI_DIR") { + PathBuf::from(override_dir) + } else { + AgentKind::GeminiCli + .config_dir(&dirs::home_dir()?) + .join("tmp") + }; + + if gemini_dir.is_dir() { + Some(Self { gemini_dir }) + } else { + None + } + } +} + +impl SessionProvider for GeminiCliProvider { + fn agent_kind(&self) -> AgentKind { + AgentKind::GeminiCli + } + + fn find_sessions(&self, filter: &TimeFilter) -> anyhow::Result> { + let mut sessions = Vec::new(); + + // Canonicalize gemini_dir to prevent symlink traversal outside boundary + let canonical_root = self + .gemini_dir + .canonicalize() + .unwrap_or_else(|_| self.gemini_dir.clone()); + + let entries = std::fs::read_dir(&self.gemini_dir)?; + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("jsonl") { + continue; + } + + // Verify resolved path stays within the gemini directory (symlink traversal guard) + if let Ok(canonical_path) = path.canonicalize() { + if !canonical_path.starts_with(&canonical_root) { + eprintln!( + "warning: skipping file outside gemini dir: {}", + path.display() + ); + continue; + } + } + + let modified = match std::fs::metadata(&path).and_then(|m| m.modified()) { + Ok(t) => t, + Err(e) => { + eprintln!( + "warning: could not read metadata for {}: {}", + path.display(), + e + ); + continue; + } + }; + + // Apply time filter + if let Some(since) = filter.since { + if modified < since { + continue; + } + } + + let session_id = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + .to_string(); + + sessions.push(SessionFile { + path, + modified, + agent: AgentKind::GeminiCli, + session_id, + }); + } + + // Sort by modification time (newest first) + sessions.sort_by(|a, b| b.modified.cmp(&a.modified)); + + // Apply latest_only filter + if filter.latest_only { + sessions.truncate(1); + } + + Ok(sessions) + } + + fn parse_session(&self, file: &SessionFile) -> anyhow::Result> { + // Guard against unbounded reads -- reject files over 100 MB + let file_size = std::fs::metadata(&file.path)?.len(); + if file_size > MAX_SESSION_SIZE { + anyhow::bail!( + "session file too large ({:.1} MB, limit {:.0} MB): {}", + file_size as f64 / (1024.0 * 1024.0), + MAX_SESSION_SIZE as f64 / (1024.0 * 1024.0), + file.path.display() + ); + } + + let content = std::fs::read_to_string(&file.path)?; + parse_gemini_session(&content, &file.session_id) + } +} + +/// Detect format by first non-whitespace character and parse accordingly. +/// +/// - First char `[` -> JSON array of messages (legacy format) +/// - Otherwise -> JSONL (one JSON object per line, current format) +fn parse_gemini_session(content: &str, session_id: &str) -> anyhow::Result> { + let trimmed = content.trim_start(); + if trimmed.starts_with('[') { + parse_json_array_format(trimmed, session_id) + } else { + parse_jsonl_format(content, session_id) + } +} + +/// Parse Gemini CLI JSONL format (one JSON object per line). +/// +/// Correlates tool_use events with tool_result events by matching +/// `id` to `tool_use_id`. +fn parse_jsonl_format(content: &str, session_id: &str) -> anyhow::Result> { + let mut invocations = Vec::new(); + let mut pending: HashMap = HashMap::new(); + + for line in content.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + let json: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(_) => continue, // skip malformed lines + }; + + process_gemini_event(&json, session_id, &mut invocations, &mut pending); + } + + Ok(invocations) +} + +/// Parse Gemini CLI JSON array format (legacy). +/// +/// The file contains a single JSON array of message objects. +fn parse_json_array_format(content: &str, session_id: &str) -> anyhow::Result> { + let arr: Vec = serde_json::from_str(content)?; + let mut invocations = Vec::new(); + let mut pending: HashMap = HashMap::new(); + + for json in &arr { + process_gemini_event(json, session_id, &mut invocations, &mut pending); + } + + Ok(invocations) +} + +/// Process a single Gemini event (tool_use or tool_result). +/// +/// Gemini CLI events have a top-level "type" field: +/// - `{ "type": "tool_use", "tool": "shell", "args": {"command": "..."}, "id": "tu-001" }` +/// - `{ "type": "tool_result", "tool_use_id": "tu-001", "content": "...", "is_error": false }` +fn process_gemini_event( + json: &serde_json::Value, + session_id: &str, + invocations: &mut Vec, + pending: &mut HashMap, +) { + let event_type = json.get("type").and_then(|t| t.as_str()).unwrap_or(""); + + match event_type { + "tool_use" => { + let tool_name = json + .get("tool") + .and_then(|n| n.as_str()) + .unwrap_or("") + .to_string(); + let tool_id = json + .get("id") + .and_then(|id| id.as_str()) + .unwrap_or("") + .to_string(); + let args_json = json.get("args").cloned().unwrap_or(serde_json::Value::Null); + + let input = map_gemini_tool_input(&tool_name, &args_json); + + let idx = invocations.len(); + invocations.push(ToolInvocation { + tool_name: tool_name.clone(), + input, + timestamp: String::new(), + session_id: session_id.to_string(), + agent: AgentKind::GeminiCli, + result: None, + }); + + if !tool_id.is_empty() { + pending.insert(tool_id, idx); + } + } + "tool_result" => { + let tool_use_id = json + .get("tool_use_id") + .and_then(|id| id.as_str()) + .unwrap_or(""); + + if let Some(&idx) = pending.get(tool_use_id) { + let result_content = match json.get("content") { + Some(serde_json::Value::String(s)) => s.clone(), + Some(serde_json::Value::Array(arr)) => arr + .iter() + .filter_map(|b| b.get("text").and_then(|t| t.as_str())) + .collect::>() + .join("\n"), + _ => String::new(), + }; + let is_error = json + .get("is_error") + .and_then(|e| e.as_bool()) + .unwrap_or(false); + + invocations[idx].result = Some(ToolResult { + content: result_content, + is_error, + }); + pending.remove(tool_use_id); + } + } + _ => {} // skip unknown event types + } +} + +/// Map Gemini CLI tool names to normalized ToolInput enum. +/// +/// Tool name mapping: +/// - "shell" / "bash" -> ToolInput::Bash +/// - "read_file" -> ToolInput::Read +/// - "write_file" -> ToolInput::Write +/// - "edit_file" -> ToolInput::Edit +/// - Everything else -> ToolInput::Other +fn map_gemini_tool_input(tool_name: &str, args: &serde_json::Value) -> ToolInput { + match tool_name { + "shell" | "bash" => { + let command = args + .get("command") + .and_then(|c| c.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Bash { command } + } + "read_file" => { + let file_path = args + .get("file_path") + .or_else(|| args.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Read { file_path } + } + "write_file" => { + let file_path = args + .get("file_path") + .or_else(|| args.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Write { file_path } + } + "edit_file" => { + let file_path = args + .get("file_path") + .or_else(|| args.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Edit { file_path } + } + _ => ToolInput::Other { + tool_name: tool_name.to_string(), + raw: args.clone(), + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_jsonl_format() { + let content = concat!( + r#"{"type":"tool_use","tool":"shell","args":{"command":"cargo test"},"id":"tu-001"}"#, + "\n", + r#"{"type":"tool_result","tool_use_id":"tu-001","content":"test result: ok","is_error":false}"#, + ); + let invocations = parse_gemini_session(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].tool_name, "shell"); + assert!(matches!( + &invocations[0].input, + ToolInput::Bash { command } if command == "cargo test" + )); + assert!(invocations[0].result.is_some()); + assert_eq!( + invocations[0].result.as_ref().unwrap().content, + "test result: ok" + ); + assert!(!invocations[0].result.as_ref().unwrap().is_error); + } + + #[test] + fn test_parse_json_array_format() { + let content = r#"[ + {"type":"tool_use","tool":"shell","args":{"command":"ls -la"},"id":"tu-001"}, + {"type":"tool_result","tool_use_id":"tu-001","content":"total 0\ndrwxr-xr-x","is_error":false} + ]"#; + let invocations = parse_gemini_session(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].tool_name, "shell"); + assert!(matches!( + &invocations[0].input, + ToolInput::Bash { command } if command == "ls -la" + )); + assert!(invocations[0].result.is_some()); + assert_eq!( + invocations[0].result.as_ref().unwrap().content, + "total 0\ndrwxr-xr-x" + ); + } + + #[test] + fn test_detect_format_by_first_char() { + // JSON array format (starts with [) + let array_content = + r#"[{"type":"tool_use","tool":"shell","args":{"command":"echo hi"},"id":"tu-001"}]"#; + let invocations = parse_gemini_session(array_content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + + // JSONL format (starts with {) + let jsonl_content = + r#"{"type":"tool_use","tool":"shell","args":{"command":"echo hi"},"id":"tu-002"}"#; + let invocations = parse_gemini_session(jsonl_content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + + // Leading whitespace before [ should still detect array format + let padded_array = format!( + " \n {}", + r#"[{"type":"tool_use","tool":"shell","args":{"command":"echo"},"id":"tu-003"}]"# + ); + let invocations = parse_gemini_session(&padded_array, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + } + + #[test] + fn test_correlate_tool_result() { + let content = concat!( + r#"{"type":"tool_use","tool":"read_file","args":{"file_path":"/tmp/test.rs"},"id":"tu-001"}"#, + "\n", + r#"{"type":"tool_result","tool_use_id":"tu-001","content":"fn main() {}"}"#, + ); + let invocations = parse_gemini_session(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert!(invocations[0].result.is_some()); + assert_eq!( + invocations[0].result.as_ref().unwrap().content, + "fn main() {}" + ); + assert!(!invocations[0].result.as_ref().unwrap().is_error); + } + + #[test] + fn test_skip_malformed_lines() { + let content = "not json\n{}\n"; + let invocations = parse_gemini_session(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 0); + } + + #[test] + fn test_empty_input() { + let invocations = parse_gemini_session("", "sess1").unwrap(); + assert_eq!(invocations.len(), 0); + } + + #[test] + fn test_tool_result_with_error() { + let content = concat!( + r#"{"type":"tool_use","tool":"shell","args":{"command":"rm /protected"},"id":"tu-001"}"#, + "\n", + r#"{"type":"tool_result","tool_use_id":"tu-001","content":"permission denied","is_error":true}"#, + ); + let invocations = parse_gemini_session(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert!(invocations[0].result.as_ref().unwrap().is_error); + assert_eq!( + invocations[0].result.as_ref().unwrap().content, + "permission denied" + ); + } + + #[test] + fn test_multiple_tools() { + let content = concat!( + r#"{"type":"tool_use","tool":"shell","args":{"command":"cargo test"},"id":"tu-001"}"#, + "\n", + r#"{"type":"tool_result","tool_use_id":"tu-001","content":"ok","is_error":false}"#, + "\n", + r#"{"type":"tool_use","tool":"read_file","args":{"file_path":"/src/main.rs"},"id":"tu-002"}"#, + "\n", + r#"{"type":"tool_result","tool_use_id":"tu-002","content":"fn main() {}","is_error":false}"#, + "\n", + r#"{"type":"tool_use","tool":"write_file","args":{"file_path":"/tmp/out.rs"},"id":"tu-003"}"#, + ); + let invocations = parse_gemini_session(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 3); + + // First: shell command + assert_eq!(invocations[0].tool_name, "shell"); + assert!(matches!( + &invocations[0].input, + ToolInput::Bash { command } if command == "cargo test" + )); + assert!(invocations[0].result.is_some()); + + // Second: read_file + assert_eq!(invocations[1].tool_name, "read_file"); + assert!(matches!( + &invocations[1].input, + ToolInput::Read { file_path } if file_path == "/src/main.rs" + )); + assert!(invocations[1].result.is_some()); + + // Third: write_file (no result yet) + assert_eq!(invocations[2].tool_name, "write_file"); + assert!(matches!( + &invocations[2].input, + ToolInput::Write { file_path } if file_path == "/tmp/out.rs" + )); + assert!(invocations[2].result.is_none()); + } + + #[test] + fn test_tool_name_mapping() { + // "bash" maps to ToolInput::Bash + let input = map_gemini_tool_input("bash", &serde_json::json!({"command": "echo hi"})); + assert!(matches!(input, ToolInput::Bash { command } if command == "echo hi")); + + // "shell" maps to ToolInput::Bash + let input = map_gemini_tool_input("shell", &serde_json::json!({"command": "ls"})); + assert!(matches!(input, ToolInput::Bash { command } if command == "ls")); + + // "read_file" maps to ToolInput::Read + let input = map_gemini_tool_input("read_file", &serde_json::json!({"file_path": "/a.rs"})); + assert!(matches!(input, ToolInput::Read { file_path } if file_path == "/a.rs")); + + // "read_file" with "path" key also works + let input = map_gemini_tool_input("read_file", &serde_json::json!({"path": "/b.rs"})); + assert!(matches!(input, ToolInput::Read { file_path } if file_path == "/b.rs")); + + // "edit_file" maps to ToolInput::Edit + let input = map_gemini_tool_input("edit_file", &serde_json::json!({"file_path": "/c.rs"})); + assert!(matches!(input, ToolInput::Edit { file_path } if file_path == "/c.rs")); + + // Unknown tools map to ToolInput::Other + let input = map_gemini_tool_input("search", &serde_json::json!({"query": "test"})); + assert!(matches!(input, ToolInput::Other { tool_name, .. } if tool_name == "search")); + } + + #[test] + fn test_agent_kind_is_gemini() { + let content = + r#"{"type":"tool_use","tool":"shell","args":{"command":"echo"},"id":"tu-001"}"#; + let invocations = parse_gemini_session(content, "sess1").unwrap(); + assert_eq!(invocations[0].agent, AgentKind::GeminiCli); + } + + #[test] + fn test_uncorrelated_result_ignored() { + // tool_result with no matching tool_use should be silently ignored + let content = r#"{"type":"tool_result","tool_use_id":"nonexistent","content":"orphan","is_error":false}"#; + let invocations = parse_gemini_session(content, "sess1").unwrap(); + assert_eq!(invocations.len(), 0); + } +} diff --git a/crates/rskim/src/cmd/session/mod.rs b/crates/rskim/src/cmd/session/mod.rs index 915e16b..03ea9a2 100644 --- a/crates/rskim/src/cmd/session/mod.rs +++ b/crates/rskim/src/cmd/session/mod.rs @@ -1,10 +1,16 @@ //! Multi-agent session infrastructure (#61) //! //! Provides agent-agnostic types and the `SessionProvider` trait for scanning -//! AI agent session files. Wave 4 ships the Claude Code provider; future agents -//! are added by implementing the trait -- no conditionals in business logic. +//! AI agent session files. Six providers ship today (Claude Code, Codex CLI, +//! Copilot CLI, Cursor, Gemini CLI, OpenCode); new agents are added by +//! implementing the trait -- no conditionals in business logic. mod claude; +mod codex; +mod copilot; +mod cursor; +mod gemini; +mod opencode; pub(crate) mod types; #[allow(unused_imports)] // ToolResult used by learn.rs tests @@ -20,6 +26,7 @@ pub(crate) use types::{ /// /// Each agent stores session data differently. Providers normalize /// tool invocations into agent-agnostic `ToolInvocation` structs. +#[allow(dead_code)] // agent_kind used in tests only; detect_single routes by AgentKind directly pub(crate) trait SessionProvider { fn agent_kind(&self) -> AgentKind; fn find_sessions(&self, filter: &TimeFilter) -> anyhow::Result>; @@ -32,33 +39,60 @@ pub(crate) trait SessionProvider { /// Auto-detect available agents by checking known session paths. /// -/// Uses `SKIM_PROJECTS_DIR` env var override for testability (same -/// pattern as `CLAUDE_CONFIG_DIR` in init.rs). +/// Individual providers accept `SKIM_*` env-var overrides (e.g. +/// `SKIM_PROJECTS_DIR`, `SKIM_CURSOR_DB_PATH`) so integration tests +/// can redirect detection to fixture directories. pub(crate) fn detect_agents() -> Vec> { let mut providers: Vec> = Vec::new(); if let Some(p) = claude::ClaudeCodeProvider::detect() { providers.push(Box::new(p)); } - // Future: if let Some(p) = CopilotProvider::detect() { ... } + if let Some(p) = codex::CodexCliProvider::detect() { + providers.push(Box::new(p)); + } + if let Some(p) = copilot::CopilotCliProvider::detect() { + providers.push(Box::new(p)); + } + if let Some(p) = cursor::CursorProvider::detect() { + providers.push(Box::new(p)); + } + if let Some(p) = gemini::GeminiCliProvider::detect() { + providers.push(Box::new(p)); + } + if let Some(p) = opencode::OpenCodeProvider::detect() { + providers.push(Box::new(p)); + } providers } +/// Detect the single provider for a specific agent kind. +/// +/// Short-circuits to only probe the requested agent's session path instead of +/// detecting all providers and filtering. +fn detect_single(kind: AgentKind) -> Vec> { + let opt: Option> = match kind { + AgentKind::ClaudeCode => claude::ClaudeCodeProvider::detect().map(|p| Box::new(p) as _), + AgentKind::CodexCli => codex::CodexCliProvider::detect().map(|p| Box::new(p) as _), + AgentKind::CopilotCli => copilot::CopilotCliProvider::detect().map(|p| Box::new(p) as _), + AgentKind::Cursor => cursor::CursorProvider::detect().map(|p| Box::new(p) as _), + AgentKind::GeminiCli => gemini::GeminiCliProvider::detect().map(|p| Box::new(p) as _), + AgentKind::OpenCode => opencode::OpenCodeProvider::detect().map(|p| Box::new(p) as _), + }; + opt.into_iter().collect() +} + /// Get providers filtered by agent kind, or all detected agents. pub(crate) fn get_providers(agent_filter: Option) -> Vec> { match agent_filter { - Some(kind) => { - let all = detect_agents(); - all.into_iter().filter(|p| p.agent_kind() == kind).collect() - } + Some(kind) => detect_single(kind), None => detect_agents(), } } /// Collect all tool invocations from the given providers within a time filter. /// -/// Iterates every provider, finds matching sessions, parses each one, and -/// flattens the results into a single `Vec`. Parse failures -/// are logged as warnings to stderr and skipped. +/// Deduplicates invocations across agents using (input_key, timestamp) pairs. +/// This prevents double-counting when multiple agents observe the same command. pub(crate) fn collect_invocations( providers: &[Box], filter: &TimeFilter, @@ -79,5 +113,135 @@ pub(crate) fn collect_invocations( } } } + + // Skip dedup when a single provider is active -- cross-agent overlap + // is impossible and we avoid allocating a HashSet key per invocation. + if providers.len() > 1 { + dedup_invocations(&mut all_invocations); + } Ok(all_invocations) } + +/// Deduplicate invocations by (input_key, timestamp). +/// +/// When multiple agents observe the same command at the same time, +/// only the first occurrence is retained. Order is preserved. +fn dedup_invocations(invocations: &mut Vec) { + let mut seen = std::collections::HashSet::new(); + invocations.retain(|inv| { + let key = (tool_input_key(&inv.input), inv.timestamp.clone()); + seen.insert(key) + }); +} + +/// Extract a string key from a ToolInput for deduplication. +fn tool_input_key(input: &ToolInput) -> String { + match input { + ToolInput::Read { file_path } => format!("read:{file_path}"), + ToolInput::Bash { command } => format!("bash:{command}"), + ToolInput::Write { file_path } => format!("write:{file_path}"), + ToolInput::Glob { pattern } => format!("glob:{pattern}"), + ToolInput::Grep { pattern } => format!("grep:{pattern}"), + ToolInput::Edit { file_path } => format!("edit:{file_path}"), + ToolInput::Other { tool_name, raw } => { + // Use serde_json::to_string for canonical JSON representation + // rather than Display, which is equivalent today but not guaranteed. + let raw_str = serde_json::to_string(raw).unwrap_or_default(); + format!("other:{tool_name}:{raw_str}") + } + } +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + fn make_invocation(command: &str, timestamp: &str, agent: AgentKind) -> ToolInvocation { + ToolInvocation { + tool_name: "Bash".to_string(), + input: ToolInput::Bash { + command: command.to_string(), + }, + timestamp: timestamp.to_string(), + session_id: "test-session".to_string(), + agent, + result: None, + } + } + + #[test] + fn test_dedup_same_command_same_timestamp() { + let mut invocations = vec![ + make_invocation("cargo test", "2026-01-01T00:00:00Z", AgentKind::ClaudeCode), + make_invocation("cargo test", "2026-01-01T00:00:00Z", AgentKind::GeminiCli), + ]; + dedup_invocations(&mut invocations); + assert_eq!(invocations.len(), 1, "same cmd+ts should dedup to 1"); + assert_eq!( + invocations[0].agent, + AgentKind::ClaudeCode, + "first occurrence should be retained" + ); + } + + #[test] + fn test_dedup_same_command_different_timestamp() { + let mut invocations = vec![ + make_invocation("cargo test", "2026-01-01T00:00:00Z", AgentKind::ClaudeCode), + make_invocation("cargo test", "2026-01-01T00:01:00Z", AgentKind::GeminiCli), + ]; + dedup_invocations(&mut invocations); + assert_eq!( + invocations.len(), + 2, + "same cmd but different ts should be preserved" + ); + } + + #[test] + fn test_dedup_different_commands_same_timestamp() { + let mut invocations = vec![ + make_invocation("cargo test", "2026-01-01T00:00:00Z", AgentKind::ClaudeCode), + make_invocation("cargo build", "2026-01-01T00:00:00Z", AgentKind::ClaudeCode), + ]; + dedup_invocations(&mut invocations); + assert_eq!( + invocations.len(), + 2, + "different commands should be preserved" + ); + } + + #[test] + fn test_dedup_empty_list() { + let mut invocations: Vec = Vec::new(); + dedup_invocations(&mut invocations); + assert!(invocations.is_empty()); + } + + #[test] + fn test_tool_input_key_variants() { + assert_eq!( + tool_input_key(&ToolInput::Bash { + command: "cargo test".to_string() + }), + "bash:cargo test" + ); + assert_eq!( + tool_input_key(&ToolInput::Read { + file_path: "/tmp/test.rs".to_string() + }), + "read:/tmp/test.rs" + ); + assert_eq!( + tool_input_key(&ToolInput::Write { + file_path: "/tmp/out.rs".to_string() + }), + "write:/tmp/out.rs" + ); + } +} diff --git a/crates/rskim/src/cmd/session/opencode.rs b/crates/rskim/src/cmd/session/opencode.rs new file mode 100644 index 0000000..5d274e8 --- /dev/null +++ b/crates/rskim/src/cmd/session/opencode.rs @@ -0,0 +1,738 @@ +//! OpenCode session provider +//! +//! Parses OpenCode SQLite session database from `.opencode/` directory. +//! OpenCode stores conversations and messages in a SQLite database with +//! tool_calls encoded as JSON in message rows. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use super::types::*; +use super::SessionProvider; + +/// Maximum SQLite database size (500 MB) to prevent unbounded reads. +/// +/// SQLite databases are larger than JSON session files, so the limit is +/// higher than the 100 MB used by JSON-based providers. +const MAX_SESSION_SIZE: u64 = 500 * 1024 * 1024; + +/// OpenCode session provider. +/// +/// Reads from `.opencode/` directory containing a SQLite database with +/// `conversations` and `messages` tables. +pub(crate) struct OpenCodeProvider { + db_path: PathBuf, +} + +impl OpenCodeProvider { + /// Detect OpenCode by walking up from cwd looking for `.opencode/` directory. + /// + /// Uses `SKIM_OPENCODE_DIR` env var override for testability. + pub(crate) fn detect() -> Option { + let opencode_dir = if let Ok(override_dir) = std::env::var("SKIM_OPENCODE_DIR") { + PathBuf::from(override_dir) + } else { + walk_up_for_opencode()? + }; + + find_sqlite_db(&opencode_dir).map(|db_path| Self { db_path }) + } +} + +/// Walk up from cwd looking for `.opencode/` directory. +fn walk_up_for_opencode() -> Option { + let mut current = std::env::current_dir().ok()?; + loop { + let candidate = current.join(".opencode"); + if candidate.is_dir() { + return Some(candidate); + } + if !current.pop() { + return None; + } + } +} + +/// Find a SQLite database file inside the given directory. +/// +/// Looks for `.db` or `.sqlite` files; returns the first match. +fn find_sqlite_db(dir: &Path) -> Option { + let entries = std::fs::read_dir(dir).ok()?; + for entry in entries.flatten() { + let path = entry.path(); + if path.is_file() { + if let Some(ext) = path.extension().and_then(|e| e.to_str()) { + if ext == "db" || ext == "sqlite" || ext == "sqlite3" { + return Some(path); + } + } + } + } + None +} + +impl SessionProvider for OpenCodeProvider { + fn agent_kind(&self) -> AgentKind { + AgentKind::OpenCode + } + + fn find_sessions(&self, filter: &TimeFilter) -> anyhow::Result> { + let conn = rusqlite::Connection::open_with_flags( + &self.db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX, + )?; + conn.busy_timeout(std::time::Duration::from_millis(1000))?; + + let mut stmt = conn.prepare( + "SELECT id, title, created_at, updated_at \ + FROM conversations \ + ORDER BY updated_at DESC \ + LIMIT 100", + )?; + + let rows = stmt.query_map([], |row| { + Ok(ConversationRow { + id: row.get(0)?, + _title: row.get::<_, Option>(1)?, + _created_at: row.get::<_, Option>(2)?, + updated_at: row.get::<_, Option>(3)?, + }) + })?; + + let mut sessions = Vec::new(); + for row in rows { + let conv = match row { + Ok(c) => c, + Err(_) => continue, + }; + + // Parse updated_at to SystemTime for filtering + let modified = parse_iso_timestamp(conv.updated_at.as_deref().unwrap_or("")) + .unwrap_or(std::time::SystemTime::UNIX_EPOCH); + + // Apply time filter + if let Some(since) = filter.since { + if modified < since { + continue; + } + } + + sessions.push(SessionFile { + path: self.db_path.clone(), + modified, + agent: AgentKind::OpenCode, + session_id: conv.id, + }); + } + + // Sort by modification time (newest first) + sessions.sort_by(|a, b| b.modified.cmp(&a.modified)); + + // Apply latest_only filter + if filter.latest_only { + sessions.truncate(1); + } + + Ok(sessions) + } + + fn parse_session(&self, file: &SessionFile) -> anyhow::Result> { + // Guard against unbounded reads -- reject databases over 500 MB + let file_size = std::fs::metadata(&self.db_path)?.len(); + if file_size > MAX_SESSION_SIZE { + anyhow::bail!( + "session database too large ({:.1} MB, limit {:.0} MB): {}", + file_size as f64 / (1024.0 * 1024.0), + MAX_SESSION_SIZE as f64 / (1024.0 * 1024.0), + self.db_path.display() + ); + } + + let conn = rusqlite::Connection::open_with_flags( + &self.db_path, + rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX, + )?; + conn.busy_timeout(std::time::Duration::from_millis(1000))?; + + let mut stmt = conn.prepare( + "SELECT id, role, content, tool_calls, tool_call_id, created_at \ + FROM messages \ + WHERE conversation_id = ?1 \ + ORDER BY created_at ASC \ + LIMIT 10000", + )?; + + let rows = stmt.query_map([&file.session_id], |row| { + Ok(MessageRow { + _id: row.get(0)?, + role: row.get(1)?, + content: row.get(2)?, + tool_calls: row.get(3)?, + tool_call_id: row.get(4)?, + created_at: row.get(5)?, + }) + })?; + + let messages: Vec = rows.filter_map(|r| r.ok()).collect(); + parse_opencode_messages(&messages, &file.session_id) + } +} + +// ============================================================================ +// Internal types +// ============================================================================ + +struct ConversationRow { + id: String, + _title: Option, + _created_at: Option, + updated_at: Option, +} + +struct MessageRow { + _id: String, + role: Option, + content: Option, + tool_calls: Option, + tool_call_id: Option, + created_at: Option, +} + +// ============================================================================ +// Message parsing (unit-testable without SQLite) +// ============================================================================ + +/// Parse OpenCode messages into tool invocations. +/// +/// Assistant messages with `tool_calls` JSON produce invocations. +/// Tool messages with `tool_call_id` provide correlated results. +fn parse_opencode_messages( + messages: &[MessageRow], + session_id: &str, +) -> anyhow::Result> { + let mut invocations = Vec::new(); + // Map from tool_call_id to index in invocations for result correlation + let mut pending: HashMap = HashMap::new(); + + for msg in messages { + let role = msg.role.as_deref().unwrap_or(""); + let timestamp = msg.created_at.as_deref().unwrap_or("").to_string(); + + match role { + "assistant" => { + // Parse tool_calls JSON array + if let Some(tool_calls_json) = &msg.tool_calls { + let tool_calls = parse_tool_calls_json(tool_calls_json); + for tc in tool_calls { + let input = map_opencode_tool(&tc.name, &tc.arguments); + let idx = invocations.len(); + invocations.push(ToolInvocation { + tool_name: tc.name.clone(), + input, + timestamp: timestamp.clone(), + session_id: session_id.to_string(), + agent: AgentKind::OpenCode, + result: None, + }); + if !tc.id.is_empty() { + pending.insert(tc.id, idx); + } + } + } + } + "tool" => { + // Correlate tool result by tool_call_id + if let Some(call_id) = &msg.tool_call_id { + if let Some(&idx) = pending.get(call_id.as_str()) { + let content = msg.content.as_deref().unwrap_or("").to_string(); + invocations[idx].result = Some(ToolResult { + content, + is_error: false, + }); + pending.remove(call_id.as_str()); + } + } + } + _ => {} // skip "user", "system", etc. + } + } + + Ok(invocations) +} + +/// A parsed tool call from the tool_calls JSON. +struct ParsedToolCall { + id: String, + name: String, + arguments: serde_json::Value, +} + +/// Parse the tool_calls JSON string into structured tool calls. +/// +/// Expected format: +/// ```json +/// [{"type": "function", "id": "call_123", "function": {"name": "bash", "arguments": "{\"command\":\"ls\"}"}}] +/// ``` +/// +/// Gracefully handles malformed JSON by returning an empty vec. +fn parse_tool_calls_json(raw: &str) -> Vec { + let arr: Vec = match serde_json::from_str(raw) { + Ok(v) => v, + Err(_) => return Vec::new(), + }; + + let mut calls = Vec::new(); + for item in &arr { + let func = match item.get("function") { + Some(f) => f, + None => continue, + }; + + let id = item + .get("id") + .and_then(|i| i.as_str()) + .unwrap_or("") + .to_string(); + + let name = func + .get("name") + .and_then(|n| n.as_str()) + .unwrap_or("") + .to_string(); + + // arguments is a JSON-encoded string that needs double-parsing + let arguments = func + .get("arguments") + .and_then(|a| { + if let Some(s) = a.as_str() { + serde_json::from_str(s).ok() + } else { + Some(a.clone()) + } + }) + .unwrap_or(serde_json::Value::Null); + + calls.push(ParsedToolCall { + id, + name, + arguments, + }); + } + + calls +} + +/// Map OpenCode tool names to normalized ToolInput. +/// +/// OpenCode uses lowercase tool names: "bash"/"shell", "read_file", "write_file", etc. +fn map_opencode_tool(name: &str, args: &serde_json::Value) -> ToolInput { + match name { + "bash" | "shell" | "execute" => { + let command = args + .get("command") + .and_then(|c| c.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Bash { command } + } + "read_file" | "read" => { + let file_path = args + .get("file_path") + .or_else(|| args.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Read { file_path } + } + "write_file" | "write" | "create_file" => { + let file_path = args + .get("file_path") + .or_else(|| args.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Write { file_path } + } + "edit_file" | "edit" | "patch" => { + let file_path = args + .get("file_path") + .or_else(|| args.get("path")) + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Edit { file_path } + } + "glob" | "list_files" => { + let pattern = args + .get("pattern") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Glob { pattern } + } + "grep" | "search" => { + let pattern = args + .get("pattern") + .and_then(|p| p.as_str()) + .unwrap_or("") + .to_string(); + ToolInput::Grep { pattern } + } + _ => ToolInput::Other { + tool_name: name.to_string(), + raw: args.clone(), + }, + } +} + +/// Parse an ISO 8601 timestamp string to SystemTime. +/// +/// Handles both `2024-01-01T00:00:00Z` and `2024-01-01T00:00:00.000Z` formats. +/// Returns None for unparseable timestamps. +fn parse_iso_timestamp(s: &str) -> Option { + // Simple ISO 8601 parser: extract year, month, day, hour, minute, second + let s = s.trim(); + if s.len() < 19 { + return None; + } + + let year: u64 = s.get(0..4)?.parse().ok()?; + let month: u64 = s.get(5..7)?.parse().ok()?; + let day: u64 = s.get(8..10)?.parse().ok()?; + let hour: u64 = s.get(11..13)?.parse().ok()?; + let minute: u64 = s.get(14..16)?.parse().ok()?; + let second: u64 = s.get(17..19)?.parse().ok()?; + + // Approximate days from epoch (good enough for filtering) + let days_in_year = 365; + let leap_years = (year - 1970 + 1) / 4; // rough approximation + let month_days: [u64; 12] = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; + let mut total_days: u64 = (year - 1970) * days_in_year + leap_years; + for m in 0..(month.saturating_sub(1) as usize) { + total_days += month_days.get(m).copied().unwrap_or(30); + } + total_days += day.saturating_sub(1); + + let total_secs = total_days * 86400 + hour * 3600 + minute * 60 + second; + Some(std::time::UNIX_EPOCH + std::time::Duration::from_secs(total_secs)) +} + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + // ---- Tool call JSON parsing ---- + + #[test] + fn test_parse_messages_with_tool_calls() { + let messages = vec![MessageRow { + _id: "msg1".to_string(), + role: Some("assistant".to_string()), + content: None, + tool_calls: Some( + r#"[{"type":"function","id":"call_1","function":{"name":"bash","arguments":"{\"command\":\"cargo test\"}"}}]"#.to_string(), + ), + tool_call_id: None, + created_at: Some("2024-01-01T00:00:00Z".to_string()), + }]; + + let invocations = parse_opencode_messages(&messages, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert_eq!(invocations[0].tool_name, "bash"); + assert!(matches!( + &invocations[0].input, + ToolInput::Bash { command } if command == "cargo test" + )); + assert_eq!(invocations[0].agent, AgentKind::OpenCode); + } + + #[test] + fn test_map_bash_tool() { + let args = serde_json::json!({"command": "ls -la"}); + let input = map_opencode_tool("bash", &args); + assert!(matches!(input, ToolInput::Bash { command } if command == "ls -la")); + + let input = map_opencode_tool("shell", &args); + assert!(matches!(input, ToolInput::Bash { command } if command == "ls -la")); + + let input = map_opencode_tool("execute", &args); + assert!(matches!(input, ToolInput::Bash { command } if command == "ls -la")); + } + + #[test] + fn test_map_read_file_tool() { + let args = serde_json::json!({"file_path": "/tmp/test.rs"}); + let input = map_opencode_tool("read_file", &args); + assert!(matches!( + input, + ToolInput::Read { file_path } if file_path == "/tmp/test.rs" + )); + + // Also supports "path" key + let args = serde_json::json!({"path": "/tmp/alt.rs"}); + let input = map_opencode_tool("read", &args); + assert!(matches!( + input, + ToolInput::Read { file_path } if file_path == "/tmp/alt.rs" + )); + } + + #[test] + fn test_correlate_tool_results_by_id() { + let messages = vec![ + MessageRow { + _id: "msg1".to_string(), + role: Some("assistant".to_string()), + content: None, + tool_calls: Some( + r#"[{"type":"function","id":"call_42","function":{"name":"read_file","arguments":"{\"file_path\":\"/tmp/test.rs\"}"}}]"#.to_string(), + ), + tool_call_id: None, + created_at: Some("2024-01-01T00:00:00Z".to_string()), + }, + MessageRow { + _id: "msg2".to_string(), + role: Some("tool".to_string()), + content: Some("fn main() {}".to_string()), + tool_calls: None, + tool_call_id: Some("call_42".to_string()), + created_at: Some("2024-01-01T00:00:01Z".to_string()), + }, + ]; + + let invocations = parse_opencode_messages(&messages, "sess1").unwrap(); + assert_eq!(invocations.len(), 1); + assert!(invocations[0].result.is_some()); + let result = invocations[0].result.as_ref().unwrap(); + assert_eq!(result.content, "fn main() {}"); + assert!(!result.is_error); + } + + #[test] + fn test_empty_conversations() { + let messages: Vec = Vec::new(); + let invocations = parse_opencode_messages(&messages, "sess1").unwrap(); + assert!(invocations.is_empty()); + } + + #[test] + fn test_malformed_tool_calls_graceful() { + let messages = vec![MessageRow { + _id: "msg1".to_string(), + role: Some("assistant".to_string()), + content: None, + tool_calls: Some("not valid json".to_string()), + tool_call_id: None, + created_at: Some("2024-01-01T00:00:00Z".to_string()), + }]; + + let invocations = parse_opencode_messages(&messages, "sess1").unwrap(); + assert!(invocations.is_empty()); + } + + #[test] + fn test_walk_up_from_cwd() { + // walk_up_for_opencode starts from real cwd, which won't have .opencode/ + // Just verify it returns None when directory not found (doesn't panic) + let result = walk_up_for_opencode(); + // Could be Some or None depending on the system -- just ensure no crash + let _ = result; + } + + #[test] + fn test_env_override_path() { + // Temporarily set env var to a non-existent directory + std::env::set_var("SKIM_OPENCODE_DIR", "/tmp/nonexistent-opencode-test-dir"); + let provider = OpenCodeProvider::detect(); + // Should return None because directory doesn't exist (or has no DB) + assert!(provider.is_none()); + std::env::remove_var("SKIM_OPENCODE_DIR"); + } + + // ---- Additional tool mapping coverage ---- + + #[test] + fn test_map_write_file_tool() { + let args = serde_json::json!({"file_path": "/tmp/out.rs"}); + let input = map_opencode_tool("write_file", &args); + assert!(matches!( + input, + ToolInput::Write { file_path } if file_path == "/tmp/out.rs" + )); + + let input = map_opencode_tool("create_file", &args); + assert!(matches!( + input, + ToolInput::Write { file_path } if file_path == "/tmp/out.rs" + )); + } + + #[test] + fn test_map_edit_file_tool() { + let args = serde_json::json!({"file_path": "/tmp/edit.rs"}); + let input = map_opencode_tool("edit_file", &args); + assert!(matches!( + input, + ToolInput::Edit { file_path } if file_path == "/tmp/edit.rs" + )); + + let input = map_opencode_tool("patch", &args); + assert!(matches!( + input, + ToolInput::Edit { file_path } if file_path == "/tmp/edit.rs" + )); + } + + #[test] + fn test_map_glob_and_grep_tools() { + let args = serde_json::json!({"pattern": "**/*.rs"}); + let input = map_opencode_tool("glob", &args); + assert!(matches!(input, ToolInput::Glob { pattern } if pattern == "**/*.rs")); + + let input = map_opencode_tool("list_files", &args); + assert!(matches!(input, ToolInput::Glob { pattern } if pattern == "**/*.rs")); + + let args = serde_json::json!({"pattern": "fn main"}); + let input = map_opencode_tool("grep", &args); + assert!(matches!(input, ToolInput::Grep { pattern } if pattern == "fn main")); + + let input = map_opencode_tool("search", &args); + assert!(matches!(input, ToolInput::Grep { pattern } if pattern == "fn main")); + } + + #[test] + fn test_map_unknown_tool() { + let args = serde_json::json!({"foo": "bar"}); + let input = map_opencode_tool("custom_tool", &args); + assert!(matches!( + input, + ToolInput::Other { tool_name, .. } if tool_name == "custom_tool" + )); + } + + #[test] + fn test_parse_tool_calls_json_empty_array() { + let calls = parse_tool_calls_json("[]"); + assert!(calls.is_empty()); + } + + #[test] + fn test_parse_tool_calls_json_multiple() { + let json = r#"[ + {"type":"function","id":"call_1","function":{"name":"bash","arguments":"{\"command\":\"ls\"}"}}, + {"type":"function","id":"call_2","function":{"name":"read_file","arguments":"{\"file_path\":\"/tmp/a.rs\"}"}} + ]"#; + let calls = parse_tool_calls_json(json); + assert_eq!(calls.len(), 2); + assert_eq!(calls[0].name, "bash"); + assert_eq!(calls[1].name, "read_file"); + } + + #[test] + fn test_parse_tool_calls_arguments_as_object() { + // Some implementations pass arguments as a JSON object instead of a string + let json = r#"[{"type":"function","id":"call_1","function":{"name":"bash","arguments":{"command":"cargo test"}}}]"#; + let calls = parse_tool_calls_json(json); + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].name, "bash"); + assert_eq!( + calls[0].arguments.get("command").and_then(|c| c.as_str()), + Some("cargo test") + ); + } + + #[test] + fn test_parse_iso_timestamp_valid() { + let ts = parse_iso_timestamp("2024-06-15T10:30:00Z"); + assert!(ts.is_some()); + assert!(ts.unwrap() > std::time::UNIX_EPOCH); + } + + #[test] + fn test_parse_iso_timestamp_with_millis() { + let ts = parse_iso_timestamp("2024-06-15T10:30:00.123Z"); + assert!(ts.is_some()); + } + + #[test] + fn test_parse_iso_timestamp_invalid() { + assert!(parse_iso_timestamp("").is_none()); + assert!(parse_iso_timestamp("not-a-date").is_none()); + assert!(parse_iso_timestamp("2024").is_none()); + } + + #[test] + fn test_multiple_tool_calls_in_one_message() { + let messages = vec![MessageRow { + _id: "msg1".to_string(), + role: Some("assistant".to_string()), + content: None, + tool_calls: Some( + r#"[ + {"type":"function","id":"call_1","function":{"name":"bash","arguments":"{\"command\":\"ls\"}"}}, + {"type":"function","id":"call_2","function":{"name":"read_file","arguments":"{\"file_path\":\"/tmp/a.rs\"}"}} + ]"# + .to_string(), + ), + tool_call_id: None, + created_at: Some("2024-01-01T00:00:00Z".to_string()), + }]; + + let invocations = parse_opencode_messages(&messages, "sess1").unwrap(); + assert_eq!(invocations.len(), 2); + assert_eq!(invocations[0].tool_name, "bash"); + assert_eq!(invocations[1].tool_name, "read_file"); + } + + #[test] + fn test_user_messages_ignored() { + let messages = vec![MessageRow { + _id: "msg1".to_string(), + role: Some("user".to_string()), + content: Some("Please help me with this code".to_string()), + tool_calls: None, + tool_call_id: None, + created_at: Some("2024-01-01T00:00:00Z".to_string()), + }]; + + let invocations = parse_opencode_messages(&messages, "sess1").unwrap(); + assert!(invocations.is_empty()); + } + + #[test] + fn test_tool_result_without_matching_call() { + // Tool result for a call_id that was never seen should be silently ignored + let messages = vec![MessageRow { + _id: "msg1".to_string(), + role: Some("tool".to_string()), + content: Some("some result".to_string()), + tool_calls: None, + tool_call_id: Some("call_nonexistent".to_string()), + created_at: Some("2024-01-01T00:00:00Z".to_string()), + }]; + + let invocations = parse_opencode_messages(&messages, "sess1").unwrap(); + assert!(invocations.is_empty()); + } + + #[test] + fn test_session_id_propagated() { + let messages = vec![MessageRow { + _id: "msg1".to_string(), + role: Some("assistant".to_string()), + content: None, + tool_calls: Some( + r#"[{"type":"function","id":"call_1","function":{"name":"bash","arguments":"{\"command\":\"echo hi\"}"}}]"#.to_string(), + ), + tool_call_id: None, + created_at: Some("2024-01-01T00:00:00Z".to_string()), + }]; + + let invocations = parse_opencode_messages(&messages, "my-session-42").unwrap(); + assert_eq!(invocations[0].session_id, "my-session-42"); + } +} diff --git a/crates/rskim/src/cmd/session/types.rs b/crates/rskim/src/cmd/session/types.rs index a234a20..47067f9 100644 --- a/crates/rskim/src/cmd/session/types.rs +++ b/crates/rskim/src/cmd/session/types.rs @@ -1,13 +1,17 @@ //! Agent-agnostic session types (#61) -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::time::SystemTime; /// Which agent produced this session data. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum AgentKind { ClaudeCode, - // Future: CopilotCli, GeminiCli, CodexCli, Cursor, Cline, ... + CodexCli, + GeminiCli, + CopilotCli, + Cursor, + OpenCode, } impl AgentKind { @@ -15,6 +19,11 @@ impl AgentKind { pub(crate) fn from_str(s: &str) -> Option { match s { "claude-code" | "claude" => Some(AgentKind::ClaudeCode), + "codex" | "codex-cli" => Some(AgentKind::CodexCli), + "gemini" | "gemini-cli" => Some(AgentKind::GeminiCli), + "copilot" | "copilot-cli" => Some(AgentKind::CopilotCli), + "cursor" => Some(AgentKind::Cursor), + "opencode" | "open-code" => Some(AgentKind::OpenCode), _ => None, } } @@ -22,6 +31,121 @@ impl AgentKind { pub(crate) fn display_name(&self) -> &'static str { match self { AgentKind::ClaudeCode => "Claude Code", + AgentKind::CodexCli => "Codex CLI", + AgentKind::GeminiCli => "Gemini CLI", + AgentKind::CopilotCli => "Copilot CLI", + AgentKind::Cursor => "Cursor", + AgentKind::OpenCode => "OpenCode", + } + } + + pub(crate) fn cli_name(&self) -> &'static str { + match self { + AgentKind::ClaudeCode => "claude-code", + AgentKind::CodexCli => "codex", + AgentKind::GeminiCli => "gemini", + AgentKind::CopilotCli => "copilot", + AgentKind::Cursor => "cursor", + AgentKind::OpenCode => "opencode", + } + } + + /// Parse from a CLI flag value, returning a descriptive error for unknown agents. + /// + /// Shared by `discover` and `learn` subcommands to avoid duplicating the + /// error message with supported agent list. + pub(crate) fn parse_cli_arg(s: &str) -> anyhow::Result { + Self::from_str(s).ok_or_else(|| { + let supported: Vec<&str> = Self::all_supported().iter().map(|a| a.cli_name()).collect(); + anyhow::anyhow!( + "unknown agent: '{}'\nSupported: {}", + s, + supported.join(", ") + ) + }) + } + + /// All supported agent kinds (for dynamic help text and iteration). + pub(crate) fn all_supported() -> &'static [AgentKind] { + &[ + AgentKind::ClaudeCode, + AgentKind::CodexCli, + AgentKind::GeminiCli, + AgentKind::CopilotCli, + AgentKind::Cursor, + AgentKind::OpenCode, + ] + } + + /// Returns the native rules directory/file path convention for this agent. + /// Returns None for agents that use single-file configs (user pastes content). + #[allow(dead_code)] // Used by learn.rs per-agent rules (phase 0.5) + pub(crate) fn rules_dir(&self) -> Option<&'static str> { + match self { + AgentKind::ClaudeCode => Some(".claude/rules"), + AgentKind::Cursor => Some(".cursor/rules"), + AgentKind::CopilotCli => Some(".github/instructions"), + // These agents use single-file configs -- user pastes content manually + AgentKind::CodexCli | AgentKind::GeminiCli | AgentKind::OpenCode => None, + } + } + + /// The dot-directory name (e.g., ".claude", ".gemini"). + /// Single source of truth for all agent directory names. + pub(crate) fn dot_dir_name(&self) -> &'static str { + match self { + AgentKind::ClaudeCode => ".claude", + AgentKind::Cursor => ".cursor", + AgentKind::GeminiCli => ".gemini", + AgentKind::CopilotCli => ".github", + AgentKind::CodexCli => ".codex", + AgentKind::OpenCode => ".opencode", + } + } + + /// Global config directory (home-relative). + /// Does NOT handle env var overrides — callers add those. + /// Note: Cursor uses runtime `is_dir()` for macOS vs Linux detection, + /// matching existing behavior in agents.rs and init/helpers.rs. + pub(crate) fn config_dir(&self, home: &Path) -> PathBuf { + match self { + AgentKind::Cursor => { + let macos = home + .join("Library") + .join("Application Support") + .join("Cursor"); + if macos.is_dir() { + macos + } else { + home.join(".config").join("Cursor") + } + } + _ => home.join(self.dot_dir_name()), + } + } + + /// Project-level config directory (CWD-relative). + pub(crate) fn project_dir(&self) -> PathBuf { + PathBuf::from(self.dot_dir_name()) + } + + /// CWD-relative detection path for project-scoped agents. + /// Returns `Some` for agents detected via CWD (Copilot, OpenCode), + /// `None` for agents detected via home directory. + #[allow(dead_code)] // Used in tests; kept for future callers + pub(crate) fn detect_dir(&self) -> Option { + match self { + AgentKind::CopilotCli | AgentKind::OpenCode => Some(self.project_dir()), + _ => None, + } + } + + /// Return the rules filename for a given agent. + pub(crate) fn rules_filename(&self) -> &'static str { + match self { + AgentKind::Cursor => "skim-corrections.mdc", + AgentKind::CopilotCli => "skim-corrections.instructions.md", + _ => "skim-corrections.md", } } } @@ -157,3 +281,253 @@ pub(crate) fn parse_duration_ago(s: &str) -> anyhow::Result { Ok(SystemTime::now() - std::time::Duration::from_secs(secs)) } + +// ============================================================================ +// Unit tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + // ---- AgentKind::from_str ---- + + #[test] + fn test_agent_kind_from_str_claude_code() { + assert_eq!( + AgentKind::from_str("claude-code"), + Some(AgentKind::ClaudeCode) + ); + assert_eq!(AgentKind::from_str("claude"), Some(AgentKind::ClaudeCode)); + } + + #[test] + fn test_agent_kind_from_str_codex() { + assert_eq!(AgentKind::from_str("codex"), Some(AgentKind::CodexCli)); + assert_eq!(AgentKind::from_str("codex-cli"), Some(AgentKind::CodexCli)); + } + + #[test] + fn test_agent_kind_from_str_gemini() { + assert_eq!(AgentKind::from_str("gemini"), Some(AgentKind::GeminiCli)); + assert_eq!( + AgentKind::from_str("gemini-cli"), + Some(AgentKind::GeminiCli) + ); + } + + #[test] + fn test_agent_kind_from_str_copilot() { + assert_eq!(AgentKind::from_str("copilot"), Some(AgentKind::CopilotCli)); + assert_eq!( + AgentKind::from_str("copilot-cli"), + Some(AgentKind::CopilotCli) + ); + } + + #[test] + fn test_agent_kind_from_str_cursor() { + assert_eq!(AgentKind::from_str("cursor"), Some(AgentKind::Cursor)); + } + + #[test] + fn test_agent_kind_from_str_opencode() { + assert_eq!(AgentKind::from_str("opencode"), Some(AgentKind::OpenCode)); + assert_eq!(AgentKind::from_str("open-code"), Some(AgentKind::OpenCode)); + } + + #[test] + fn test_agent_kind_from_str_unknown() { + assert_eq!(AgentKind::from_str("unknown"), None); + assert_eq!(AgentKind::from_str(""), None); + } + + // ---- AgentKind::parse_cli_arg ---- + + #[test] + fn test_agent_kind_parse_cli_arg_valid() { + assert_eq!( + AgentKind::parse_cli_arg("claude-code").unwrap(), + AgentKind::ClaudeCode + ); + } + + #[test] + fn test_agent_kind_parse_cli_arg_unknown() { + let err = AgentKind::parse_cli_arg("nonexistent").unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("unknown agent"), "got: {msg}"); + assert!( + msg.contains("claude-code"), + "should list supported agents, got: {msg}" + ); + } + + // ---- AgentKind::display_name / cli_name ---- + + #[test] + fn test_agent_kind_display_name() { + assert_eq!(AgentKind::ClaudeCode.display_name(), "Claude Code"); + assert_eq!(AgentKind::CodexCli.display_name(), "Codex CLI"); + assert_eq!(AgentKind::GeminiCli.display_name(), "Gemini CLI"); + assert_eq!(AgentKind::CopilotCli.display_name(), "Copilot CLI"); + assert_eq!(AgentKind::Cursor.display_name(), "Cursor"); + assert_eq!(AgentKind::OpenCode.display_name(), "OpenCode"); + } + + #[test] + fn test_agent_kind_cli_name() { + assert_eq!(AgentKind::ClaudeCode.cli_name(), "claude-code"); + assert_eq!(AgentKind::CodexCli.cli_name(), "codex"); + assert_eq!(AgentKind::GeminiCli.cli_name(), "gemini"); + assert_eq!(AgentKind::CopilotCli.cli_name(), "copilot"); + assert_eq!(AgentKind::Cursor.cli_name(), "cursor"); + assert_eq!(AgentKind::OpenCode.cli_name(), "opencode"); + } + + // ---- AgentKind::all_supported ---- + + #[test] + fn test_agent_kind_all_supported() { + let all = AgentKind::all_supported(); + assert_eq!(all.len(), 6); + assert!(all.contains(&AgentKind::ClaudeCode)); + assert!(all.contains(&AgentKind::CodexCli)); + assert!(all.contains(&AgentKind::GeminiCli)); + assert!(all.contains(&AgentKind::CopilotCli)); + assert!(all.contains(&AgentKind::Cursor)); + assert!(all.contains(&AgentKind::OpenCode)); + } + + // ---- AgentKind::rules_dir ---- + + #[test] + fn test_agent_kind_rules_dir() { + assert_eq!(AgentKind::ClaudeCode.rules_dir(), Some(".claude/rules")); + assert_eq!(AgentKind::Cursor.rules_dir(), Some(".cursor/rules")); + assert_eq!( + AgentKind::CopilotCli.rules_dir(), + Some(".github/instructions") + ); + assert_eq!(AgentKind::CodexCli.rules_dir(), None); + assert_eq!(AgentKind::GeminiCli.rules_dir(), None); + assert_eq!(AgentKind::OpenCode.rules_dir(), None); + } + + // ---- Display impl ---- + + #[test] + fn test_agent_kind_display() { + assert_eq!(format!("{}", AgentKind::ClaudeCode), "Claude Code"); + assert_eq!(format!("{}", AgentKind::Cursor), "Cursor"); + } + + // ---- Round-trip: cli_name -> from_str ---- + + #[test] + fn test_agent_kind_roundtrip() { + for agent in AgentKind::all_supported() { + let parsed = AgentKind::from_str(agent.cli_name()); + assert_eq!(parsed, Some(*agent), "round-trip failed for {:?}", agent); + } + } + + // ---- AgentKind::dot_dir_name ---- + + #[test] + fn test_agent_kind_dot_dir_name() { + assert_eq!(AgentKind::ClaudeCode.dot_dir_name(), ".claude"); + assert_eq!(AgentKind::Cursor.dot_dir_name(), ".cursor"); + assert_eq!(AgentKind::GeminiCli.dot_dir_name(), ".gemini"); + assert_eq!(AgentKind::CopilotCli.dot_dir_name(), ".github"); + assert_eq!(AgentKind::CodexCli.dot_dir_name(), ".codex"); + assert_eq!(AgentKind::OpenCode.dot_dir_name(), ".opencode"); + } + + // ---- AgentKind::config_dir ---- + + #[test] + fn test_agent_kind_config_dir_simple_agents() { + let home = PathBuf::from("/fake/home"); + assert_eq!( + AgentKind::ClaudeCode.config_dir(&home), + PathBuf::from("/fake/home/.claude") + ); + assert_eq!( + AgentKind::CodexCli.config_dir(&home), + PathBuf::from("/fake/home/.codex") + ); + assert_eq!( + AgentKind::GeminiCli.config_dir(&home), + PathBuf::from("/fake/home/.gemini") + ); + assert_eq!( + AgentKind::CopilotCli.config_dir(&home), + PathBuf::from("/fake/home/.github") + ); + assert_eq!( + AgentKind::OpenCode.config_dir(&home), + PathBuf::from("/fake/home/.opencode") + ); + } + + #[test] + fn test_agent_kind_config_dir_cursor_linux_fallback() { + // With a fake home, macOS path won't exist → falls back to Linux path + let home = PathBuf::from("/fake/home"); + assert_eq!( + AgentKind::Cursor.config_dir(&home), + PathBuf::from("/fake/home/.config/Cursor") + ); + } + + // ---- AgentKind::project_dir ---- + + #[test] + fn test_agent_kind_project_dir() { + for agent in AgentKind::all_supported() { + assert_eq!( + agent.project_dir(), + PathBuf::from(agent.dot_dir_name()), + "project_dir mismatch for {:?}", + agent + ); + } + } + + // ---- AgentKind::detect_dir ---- + + #[test] + fn test_agent_kind_detect_dir() { + assert!(AgentKind::ClaudeCode.detect_dir().is_none()); + assert!(AgentKind::Cursor.detect_dir().is_none()); + assert!(AgentKind::GeminiCli.detect_dir().is_none()); + assert!(AgentKind::CodexCli.detect_dir().is_none()); + assert_eq!( + AgentKind::CopilotCli.detect_dir(), + Some(PathBuf::from(".github")) + ); + assert_eq!( + AgentKind::OpenCode.detect_dir(), + Some(PathBuf::from(".opencode")) + ); + } + + // ---- AgentKind::rules_filename ---- + + #[test] + fn test_agent_kind_rules_filename() { + assert_eq!( + AgentKind::ClaudeCode.rules_filename(), + "skim-corrections.md" + ); + assert_eq!(AgentKind::Cursor.rules_filename(), "skim-corrections.mdc"); + assert_eq!( + AgentKind::CopilotCli.rules_filename(), + "skim-corrections.instructions.md" + ); + assert_eq!(AgentKind::CodexCli.rules_filename(), "skim-corrections.md"); + assert_eq!(AgentKind::GeminiCli.rules_filename(), "skim-corrections.md"); + assert_eq!(AgentKind::OpenCode.rules_filename(), "skim-corrections.md"); + } +} diff --git a/crates/rskim/tests/cli_agents.rs b/crates/rskim/tests/cli_agents.rs new file mode 100644 index 0000000..5233faf --- /dev/null +++ b/crates/rskim/tests/cli_agents.rs @@ -0,0 +1,295 @@ +//! Integration tests for `skim agents` subcommand. + +use assert_cmd::Command; +use predicates::prelude::*; +use tempfile::TempDir; + +fn skim_cmd() -> Command { + Command::cargo_bin("skim").unwrap() +} + +#[test] +fn test_agents_help() { + skim_cmd() + .args(["agents", "--help"]) + .assert() + .success() + .stdout(predicate::str::contains("skim agents")) + .stdout(predicate::str::contains("--json")); +} + +#[test] +fn test_agents_short_help() { + skim_cmd() + .args(["agents", "-h"]) + .assert() + .success() + .stdout(predicate::str::contains("skim agents")); +} + +#[test] +fn test_agents_runs_without_crash() { + // Should succeed even with no agents detected + skim_cmd() + .args(["agents"]) + .assert() + .success() + .stdout(predicate::str::contains("Detected agents:")); +} + +#[test] +fn test_agents_json_output_valid_json() { + let output = skim_cmd() + .args(["agents", "--json"]) + .output() + .expect("failed to run skim agents --json"); + + assert!(output.status.success()); + + let stdout = String::from_utf8_lossy(&output.stdout); + let parsed: serde_json::Value = + serde_json::from_str(&stdout).expect("output should be valid JSON"); + + // Verify structure + assert!(parsed.get("agents").is_some(), "missing 'agents' key"); + let agents = parsed["agents"].as_array().expect("agents should be array"); + assert!(!agents.is_empty(), "agents array should not be empty"); + + // Each agent should have expected fields + for agent in agents { + assert!(agent.get("name").is_some(), "missing 'name' field"); + assert!(agent.get("cli_name").is_some(), "missing 'cli_name' field"); + assert!(agent.get("detected").is_some(), "missing 'detected' field"); + assert!(agent.get("hooks").is_some(), "missing 'hooks' field"); + } +} + +#[test] +fn test_agents_detects_claude_code_with_fixture() { + let dir = TempDir::new().unwrap(); + let project_dir = dir.path().join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + std::fs::write(project_dir.join("session.jsonl"), "{}").unwrap(); + std::fs::write(project_dir.join("other.jsonl"), "{}").unwrap(); + + skim_cmd() + .args(["agents"]) + .env("SKIM_PROJECTS_DIR", dir.path().to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("Claude Code")) + .stdout(predicate::str::contains("detected")) + .stdout(predicate::str::contains("2 files")); +} + +#[test] +fn test_agents_json_detects_claude_code_with_fixture() { + let dir = TempDir::new().unwrap(); + let project_dir = dir.path().join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + std::fs::write(project_dir.join("session.jsonl"), "{}").unwrap(); + + let output = skim_cmd() + .args(["agents", "--json"]) + .env("SKIM_PROJECTS_DIR", dir.path().to_str().unwrap()) + .output() + .expect("failed to run skim agents --json"); + + assert!(output.status.success()); + + let stdout = String::from_utf8_lossy(&output.stdout); + let parsed: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); + + let agents = parsed["agents"].as_array().unwrap(); + let claude = agents + .iter() + .find(|a| a["cli_name"] == "claude-code") + .expect("should have claude-code agent"); + + assert_eq!(claude["detected"], true); + assert!(claude["sessions"].is_object(), "sessions should be present"); + assert!( + claude["sessions"]["detail"] + .as_str() + .unwrap() + .contains("1 files"), + "expected 1 file in detail" + ); +} + +#[test] +fn test_agents_lists_all_supported() { + let output = skim_cmd() + .args(["agents", "--json"]) + .output() + .expect("failed to run skim agents --json"); + + let stdout = String::from_utf8_lossy(&output.stdout); + let parsed: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); + let agents = parsed["agents"].as_array().unwrap(); + + // Should include all supported agents + let cli_names: Vec<&str> = agents + .iter() + .filter_map(|a| a["cli_name"].as_str()) + .collect(); + + assert!(cli_names.contains(&"claude-code"), "missing claude-code"); + assert!(cli_names.contains(&"cursor"), "missing cursor"); + assert!(cli_names.contains(&"codex"), "missing codex"); + assert!(cli_names.contains(&"gemini"), "missing gemini"); + assert!(cli_names.contains(&"copilot"), "missing copilot"); + assert!(cli_names.contains(&"opencode"), "missing opencode"); +} + +#[test] +fn test_agents_text_output_shows_all_names() { + skim_cmd() + .args(["agents"]) + .assert() + .success() + .stdout(predicate::str::contains("Claude Code")) + .stdout(predicate::str::contains("Cursor")) + .stdout(predicate::str::contains("Codex CLI")) + .stdout(predicate::str::contains("Gemini CLI")) + .stdout(predicate::str::contains("Copilot CLI")); +} + +// ============================================================================ +// Phase 6: Agent output accuracy and completeness +// ============================================================================ + +#[test] +fn test_agents_no_agents_all_not_detected() { + // Point all providers to nonexistent paths -- all should show "not detected" + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + + let output = skim_cmd() + .args(["agents", "--json"]) + .env("SKIM_PROJECTS_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_CODEX_SESSIONS_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_COPILOT_DIR", nonexistent.to_str().unwrap()) + .env( + "SKIM_CURSOR_DB_PATH", + nonexistent.join("no-cursor.vscdb").to_str().unwrap(), + ) + .env("SKIM_GEMINI_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_OPENCODE_DIR", nonexistent.to_str().unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + let parsed: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + let agents = parsed["agents"].as_array().unwrap(); + + // At minimum, Claude Code and OpenCode use env overrides. + // Some agents (Cursor, Copilot, Gemini) detect from filesystem paths that + // don't have env overrides in the agents command. But with nonexistent paths + // set for those that do have overrides, we can at least verify the structure. + for agent in agents { + let name = agent["name"].as_str().unwrap(); + let detected = agent["detected"].as_bool().unwrap(); + // For agents whose detection depends on env vars we've overridden, + // they should not be detected + if name == "Claude Code" || name == "OpenCode" { + assert!( + !detected, + "{name} should not be detected with nonexistent path" + ); + } + } +} + +/// All supported agents -- single source of truth for count assertions. +const EXPECTED_AGENTS: &[&str] = &[ + "claude-code", + "cursor", + "codex", + "gemini", + "copilot", + "opencode", +]; + +#[test] +fn test_agents_json_has_expected_count() { + let output = skim_cmd().args(["agents", "--json"]).output().unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + let parsed: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + let agents = parsed["agents"].as_array().unwrap(); + assert_eq!( + agents.len(), + EXPECTED_AGENTS.len(), + "Should have exactly {} agent entries (one per EXPECTED_AGENTS), got {}", + EXPECTED_AGENTS.len(), + agents.len() + ); +} + +#[test] +fn test_agents_claude_detected_with_session_count() { + let dir = TempDir::new().unwrap(); + let project_dir = dir.path().join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + std::fs::write(project_dir.join("session1.jsonl"), "{}").unwrap(); + std::fs::write(project_dir.join("session2.jsonl"), "{}").unwrap(); + std::fs::write(project_dir.join("session3.jsonl"), "{}").unwrap(); + + let output = skim_cmd() + .args(["agents", "--json"]) + .env("SKIM_PROJECTS_DIR", dir.path().to_str().unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + let parsed: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + let agents = parsed["agents"].as_array().unwrap(); + let claude = agents + .iter() + .find(|a| a["cli_name"] == "claude-code") + .expect("should have claude-code agent"); + + assert_eq!(claude["detected"], true); + let detail = claude["sessions"]["detail"].as_str().unwrap(); + assert!( + detail.contains("3 files"), + "Should report 3 files, got: {detail}" + ); +} + +#[test] +fn test_agents_opencode_shows_typescript_plugin_note() { + // OpenCode should show "not supported (TypeScript plugin model)" for hooks + let output = skim_cmd().args(["agents", "--json"]).output().unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8_lossy(&output.stdout); + let parsed: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + let agents = parsed["agents"].as_array().unwrap(); + let opencode = agents + .iter() + .find(|a| a["cli_name"] == "opencode") + .expect("should have opencode agent"); + + assert_eq!(opencode["hooks"]["status"], "not_supported"); + assert_eq!(opencode["hooks"]["note"], "TypeScript plugin model"); +} + +#[test] +fn test_agents_text_not_detected_without_fixtures() { + // Text mode with no agents detected should say "not detected" for each + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + + skim_cmd() + .args(["agents"]) + .env("SKIM_PROJECTS_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_OPENCODE_DIR", nonexistent.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("not detected")); +} diff --git a/crates/rskim/tests/cli_discover.rs b/crates/rskim/tests/cli_discover.rs index 32430ca..956a9a4 100644 --- a/crates/rskim/tests/cli_discover.rs +++ b/crates/rskim/tests/cli_discover.rs @@ -8,6 +8,22 @@ fn skim_cmd() -> Command { Command::cargo_bin("skim").unwrap() } +/// Build a skim command with all session providers neutralized (pointing to nonexistent paths). +/// Callers override specific providers as needed. +fn skim_cmd_neutralized(nonexistent: &std::path::Path) -> Command { + let mut cmd = skim_cmd(); + cmd.env("SKIM_PROJECTS_DIR", nonexistent.as_os_str()) + .env("SKIM_CODEX_SESSIONS_DIR", nonexistent.as_os_str()) + .env("SKIM_COPILOT_DIR", nonexistent.as_os_str()) + .env( + "SKIM_CURSOR_DB_PATH", + nonexistent.join("no-cursor.vscdb").as_os_str(), + ) + .env("SKIM_GEMINI_DIR", nonexistent.as_os_str()) + .env("SKIM_OPENCODE_DIR", nonexistent.as_os_str()); + cmd +} + #[test] fn test_discover_help() { skim_cmd() @@ -72,6 +88,15 @@ fn test_discover_no_agent_dir() { skim_cmd() .args(["discover"]) .env("SKIM_PROJECTS_DIR", nonexistent.to_str().unwrap()) + // Neutralize all providers to ensure no agents are detected + .env("SKIM_CODEX_SESSIONS_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_COPILOT_DIR", nonexistent.to_str().unwrap()) + .env( + "SKIM_CURSOR_DB_PATH", + dir.path().join("no-cursor.vscdb").to_str().unwrap(), + ) + .env("SKIM_GEMINI_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_OPENCODE_DIR", nonexistent.to_str().unwrap()) .assert() .success() .stdout(predicate::str::contains("No AI agent sessions found")); @@ -153,6 +178,26 @@ fn test_discover_unknown_flag_error() { .stderr(predicate::str::contains("unknown flag")); } +#[test] +fn test_discover_since_missing_value() { + // --since with no value should fail with a descriptive error + skim_cmd() + .args(["discover", "--since"]) + .assert() + .failure() + .stderr(predicate::str::contains("--since requires a value")); +} + +#[test] +fn test_discover_agent_missing_value() { + // --agent with no value should fail with a descriptive error + skim_cmd() + .args(["discover", "--agent"]) + .assert() + .failure() + .stderr(predicate::str::contains("--agent requires a value")); +} + #[test] fn test_discover_json_has_structure() { let dir = TempDir::new().unwrap(); @@ -175,3 +220,202 @@ fn test_discover_json_has_structure() { assert!(json["code_reads"]["total"].is_number()); assert!(json["commands"]["total"].is_number()); } + +// ============================================================================ +// Phase 6: Cross-agent discover tests +// ============================================================================ + +/// Helper: create a Codex session fixture inside a YYYY/MM/DD/ structure. +fn create_codex_fixture(base_dir: &std::path::Path) { + let session_dir = base_dir.join("2026/03/25"); + std::fs::create_dir_all(&session_dir).unwrap(); + let fixture = include_str!("fixtures/codex/sample-session.jsonl"); + std::fs::write(session_dir.join("rollout-abc.jsonl"), fixture).unwrap(); +} + +#[test] +fn test_discover_cross_agent_claude_and_codex() { + // Set up fixtures for both Claude Code and Codex simultaneously + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + + // Claude Code fixture + let claude_dir = dir.path().join("claude-projects"); + let project_dir = claude_dir.join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + let claude_fixture = include_str!("fixtures/cmd/session/claude_reads.jsonl"); + std::fs::write(project_dir.join("test-session.jsonl"), claude_fixture).unwrap(); + + // Codex fixture + let codex_dir = dir.path().join("codex-sessions"); + create_codex_fixture(&codex_dir); + + let output = skim_cmd_neutralized(&nonexistent) + .args(["discover", "--since", "7d", "--json"]) + .env("SKIM_PROJECTS_DIR", claude_dir.to_str().unwrap()) + .env("SKIM_CODEX_SESSIONS_DIR", codex_dir.to_str().unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let json: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap(); + // Both agents contributed invocations + let total = json["total_invocations"].as_u64().unwrap(); + assert!( + total >= 2, + "Should have invocations from both agents, got {total}" + ); +} + +#[test] +fn test_discover_agent_filter_excludes_other_agents() { + // Set up both Claude Code and Codex fixtures, filter to claude-code only + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + + let claude_dir = dir.path().join("claude-projects"); + let project_dir = claude_dir.join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + let claude_fixture = include_str!("fixtures/cmd/session/claude_bash.jsonl"); + std::fs::write(project_dir.join("test-session.jsonl"), claude_fixture).unwrap(); + + let codex_dir = dir.path().join("codex-sessions"); + create_codex_fixture(&codex_dir); + + // Filter to claude-code only -- should NOT include Codex invocations + let output_filtered = skim_cmd_neutralized(&nonexistent) + .args([ + "discover", + "--agent", + "claude-code", + "--since", + "7d", + "--json", + ]) + .env("SKIM_PROJECTS_DIR", claude_dir.to_str().unwrap()) + .env("SKIM_CODEX_SESSIONS_DIR", codex_dir.to_str().unwrap()) + .output() + .unwrap(); + assert!(output_filtered.status.success()); + + // Now get unfiltered results for comparison + let output_all = skim_cmd_neutralized(&nonexistent) + .args(["discover", "--since", "7d", "--json"]) + .env("SKIM_PROJECTS_DIR", claude_dir.to_str().unwrap()) + .env("SKIM_CODEX_SESSIONS_DIR", codex_dir.to_str().unwrap()) + .output() + .unwrap(); + assert!(output_all.status.success()); + + let json_filtered: serde_json::Value = serde_json::from_slice(&output_filtered.stdout).unwrap(); + let json_all: serde_json::Value = serde_json::from_slice(&output_all.stdout).unwrap(); + + let filtered_total = json_filtered["total_invocations"].as_u64().unwrap(); + let all_total = json_all["total_invocations"].as_u64().unwrap(); + + // Filtered total should be strictly less than unfiltered total (Codex excluded) + assert!( + filtered_total < all_total, + "Filtering by claude-code should exclude Codex invocations: filtered={filtered_total}, all={all_total}" + ); +} + +#[test] +fn test_discover_agent_filter_codex_only() { + // Set up both agents, filter to codex only + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + + let claude_dir = dir.path().join("claude-projects"); + let project_dir = claude_dir.join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + let claude_fixture = include_str!("fixtures/cmd/session/claude_reads.jsonl"); + std::fs::write(project_dir.join("test-session.jsonl"), claude_fixture).unwrap(); + + let codex_dir = dir.path().join("codex-sessions"); + create_codex_fixture(&codex_dir); + + // Filter to codex only + let output = skim_cmd_neutralized(&nonexistent) + .args(["discover", "--agent", "codex", "--since", "7d", "--json"]) + .env("SKIM_PROJECTS_DIR", claude_dir.to_str().unwrap()) + .env("SKIM_CODEX_SESSIONS_DIR", codex_dir.to_str().unwrap()) + .output() + .unwrap(); + assert!(output.status.success()); + let json: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap(); + let total = json["total_invocations"].as_u64().unwrap(); + assert!( + total >= 1, + "Should have Codex invocations when filtering by codex, got {total}" + ); +} + +// ============================================================================ +// Phase 6: skim commands excluded from "missed" count +// ============================================================================ + +#[test] +fn test_discover_skim_commands_excluded_from_analysis() { + // Create a session with a mix of skim-prefixed and regular commands. + // Only regular commands should appear in the "commands" count. + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + let claude_dir = dir.path().join("claude-projects"); + let project_dir = claude_dir.join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + + // Session with: 2 skim commands (should be excluded) + 1 regular command + let session = r#"{"type":"assistant","message":{"content":[{"type":"tool_use","id":"t01","name":"Bash","input":{"command":"skim test cargo"}}]},"timestamp":"2024-01-01T00:00:00Z","sessionId":"sess1"} +{"type":"user","message":{"content":[{"tool_use_id":"t01","type":"tool_result","content":"ok"}]}} +{"type":"assistant","message":{"content":[{"type":"tool_use","id":"t02","name":"Bash","input":{"command":"skim build clippy"}}]},"timestamp":"2024-01-01T00:01:00Z","sessionId":"sess1"} +{"type":"user","message":{"content":[{"tool_use_id":"t02","type":"tool_result","content":"ok"}]}} +{"type":"assistant","message":{"content":[{"type":"tool_use","id":"t03","name":"Bash","input":{"command":"cargo test"}}]},"timestamp":"2024-01-01T00:02:00Z","sessionId":"sess1"} +{"type":"user","message":{"content":[{"tool_use_id":"t03","type":"tool_result","content":"test result: ok. 5 passed"}]}} +"#; + std::fs::write(project_dir.join("mixed.jsonl"), session).unwrap(); + + let output = skim_cmd_neutralized(&nonexistent) + .args(["discover", "--since", "7d", "--json"]) + .env("SKIM_PROJECTS_DIR", claude_dir.to_str().unwrap()) + .output() + .unwrap(); + assert!(output.status.success()); + + let json: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap(); + let commands_total = json["commands"]["total"].as_u64().unwrap(); + // Only "cargo test" should be counted, not the skim commands + assert_eq!( + commands_total, 1, + "skim commands should be excluded from command analysis, got {commands_total}" + ); +} + +#[test] +fn test_discover_only_skim_commands_shows_zero() { + // Session with only skim-prefixed commands should show 0 in commands count + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + let claude_dir = dir.path().join("claude-projects"); + let project_dir = claude_dir.join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + + let session = r#"{"type":"assistant","message":{"content":[{"type":"tool_use","id":"t01","name":"Bash","input":{"command":"skim test cargo"}}]},"timestamp":"2024-01-01T00:00:00Z","sessionId":"sess1"} +{"type":"user","message":{"content":[{"tool_use_id":"t01","type":"tool_result","content":"ok"}]}} +"#; + std::fs::write(project_dir.join("skim-only.jsonl"), session).unwrap(); + + let output = skim_cmd_neutralized(&nonexistent) + .args(["discover", "--since", "7d", "--json"]) + .env("SKIM_PROJECTS_DIR", claude_dir.to_str().unwrap()) + .output() + .unwrap(); + assert!(output.status.success()); + + let json: serde_json::Value = serde_json::from_slice(&output.stdout).unwrap(); + let commands_total = json["commands"]["total"].as_u64().unwrap(); + assert_eq!( + commands_total, 0, + "Sessions with only skim commands should show 0 commands, got {commands_total}" + ); +} diff --git a/crates/rskim/tests/cli_e2e_rewrite.rs b/crates/rskim/tests/cli_e2e_rewrite.rs index 2fcbf25..3aa7ebc 100644 --- a/crates/rskim/tests/cli_e2e_rewrite.rs +++ b/crates/rskim/tests/cli_e2e_rewrite.rs @@ -306,3 +306,361 @@ fn test_rewrite_hook_compound_cargo_test_and_build() { .stdout(predicate::str::contains("skim test cargo")) .stdout(predicate::str::contains("skim build cargo")); } + +// ============================================================================ +// Phase 6: Hook protocol per-agent tests +// ============================================================================ + +#[test] +fn test_rewrite_hook_default_is_claude_code_behavior() { + // --hook without --agent should default to Claude Code behavior + let input = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + // Should produce hookSpecificOutput (Claude Code behavior) + assert!( + stdout.contains("hookSpecificOutput"), + "Default hook mode should produce Claude Code hookSpecificOutput" + ); + assert!( + stdout.contains("skim test cargo"), + "Should rewrite cargo test" + ); +} + +#[test] +fn test_rewrite_hook_agent_claude_code_explicit() { + // --hook --agent claude-code should produce Claude Code hookSpecificOutput + let input = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "claude-code"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert_eq!(json["hookSpecificOutput"]["hookEventName"], "PreToolUse"); + assert!(json["hookSpecificOutput"]["updatedInput"]["command"] + .as_str() + .unwrap() + .contains("skim test cargo")); +} + +#[test] +fn test_rewrite_hook_agent_gemini_match() { + // Gemini uses same input format as Claude Code (tool_input.command) + // but responds with { "decision": "allow", "tool_input": { "command": ... } } + let input = serde_json::json!({ + "tool_name": "shell", + "tool_input": { + "command": "cargo test" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "gemini"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap(); + assert_eq!( + json["decision"], "allow", + "Gemini response should have decision=allow" + ); + assert!( + json["tool_input"]["command"] + .as_str() + .unwrap() + .contains("skim test cargo"), + "Gemini response should contain rewritten command" + ); +} + +#[test] +fn test_rewrite_hook_agent_gemini_no_match_passthrough() { + let input = serde_json::json!({ + "tool_input": { + "command": "echo hello" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "gemini"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!( + stdout.trim().is_empty(), + "Gemini no-match should passthrough (empty stdout), got: {stdout}" + ); +} + +#[test] +fn test_rewrite_hook_agent_copilot_match() { + // Copilot uses deny-with-suggestion response format + let input = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "copilot"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap(); + assert_eq!( + json["permissionDecision"], "deny", + "Copilot response should have permissionDecision=deny" + ); + assert!( + json["reason"].as_str().unwrap().contains("skim test cargo"), + "Copilot deny reason should contain rewritten command" + ); +} + +#[test] +fn test_rewrite_hook_agent_copilot_no_match_passthrough() { + let input = serde_json::json!({ + "tool_input": { + "command": "echo hello" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "copilot"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!( + stdout.trim().is_empty(), + "Copilot no-match should passthrough (empty stdout), got: {stdout}" + ); +} + +#[test] +fn test_rewrite_hook_agent_cursor_match() { + // Cursor uses { "command": ... } at top level (not nested under tool_input) + // and responds with { "permission": "allow", "updated_input": { "command": ... } } + let input = serde_json::json!({ + "command": "cargo test" + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "cursor"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap(); + assert_eq!( + json["permission"], "allow", + "Cursor response should have permission=allow" + ); + assert!( + json["updated_input"]["command"] + .as_str() + .unwrap() + .contains("skim test cargo"), + "Cursor response should contain rewritten command" + ); +} + +#[test] +fn test_rewrite_hook_agent_cursor_no_match_passthrough() { + let input = serde_json::json!({ + "command": "echo hello" + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "cursor"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!( + stdout.trim().is_empty(), + "Cursor no-match should passthrough (empty stdout), got: {stdout}" + ); +} + +#[test] +fn test_rewrite_hook_agent_codex_awareness_only() { + // Codex is AwarenessOnly — always empty stdout, exit 0 + let input = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "codex"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!( + stdout.trim().is_empty(), + "Codex (AwarenessOnly) should produce empty stdout, got: {stdout}" + ); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.is_empty(), + "Codex hook mode should produce zero stderr, got: {stderr}" + ); +} + +#[test] +fn test_rewrite_hook_agent_opencode_awareness_only() { + // OpenCode is AwarenessOnly — always empty stdout, exit 0 + let input = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "opencode"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!( + stdout.trim().is_empty(), + "OpenCode (AwarenessOnly) should produce empty stdout, got: {stdout}" + ); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.is_empty(), + "OpenCode hook mode should produce zero stderr, got: {stderr}" + ); +} + +#[test] +fn test_rewrite_hook_agent_unknown_passthrough() { + // Unknown agent name (not in AgentKind::from_str) should default to + // Claude Code behavior since parse_agent_flag returns None. + let input = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", "unknown-agent"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!( + output.status.success(), + "Unknown agent should not crash, exit 0" + ); + + // Unknown agent falls back to Claude Code -- "cargo test" is rewritable, + // so stdout should contain a Claude Code hook response. + let stdout = String::from_utf8(output.stdout).unwrap(); + assert!( + stdout.contains("hookSpecificOutput"), + "Unknown agent should fall back to Claude Code response format, got: {stdout}" + ); +} + +#[test] +fn test_rewrite_hook_all_agents_zero_stderr() { + // Verify ALL hook responses have empty stderr + let agents_and_inputs: Vec<(&str, serde_json::Value)> = vec![ + ( + "claude-code", + serde_json::json!({"tool_input": {"command": "cargo test"}}), + ), + ("cursor", serde_json::json!({"command": "cargo test"})), + ( + "gemini", + serde_json::json!({"tool_input": {"command": "cargo test"}}), + ), + ( + "copilot", + serde_json::json!({"tool_input": {"command": "cargo test"}}), + ), + ( + "codex", + serde_json::json!({"tool_input": {"command": "cargo test"}}), + ), + ( + "opencode", + serde_json::json!({"tool_input": {"command": "cargo test"}}), + ), + ]; + + for (agent, input) in agents_and_inputs { + let output = skim_cmd() + .args(["rewrite", "--hook", "--agent", agent]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success(), "Agent {agent} should exit 0"); + let stderr = String::from_utf8(output.stderr.clone()).unwrap(); + assert!( + stderr.is_empty(), + "Agent {agent} hook mode must produce zero stderr, got: {stderr}" + ); + } +} + +// ============================================================================ +// Phase 6: Stderr cleanliness -- hook mode produces ZERO stderr +// ============================================================================ +// Per-agent zero-stderr coverage is handled by test_rewrite_hook_all_agents_zero_stderr. +// Only the passthrough (no --agent flag) case remains here as unique coverage. + +#[test] +fn test_rewrite_hook_passthrough_zero_stderr() { + // Non-matching command with no agent flag + let input = serde_json::json!({ + "tool_input": { + "command": "ls -la" + } + }); + let output = skim_cmd() + .args(["rewrite", "--hook"]) + .write_stdin(serde_json::to_string(&input).unwrap()) + .output() + .unwrap(); + + assert!(output.status.success()); + let stderr = String::from_utf8(output.stderr).unwrap(); + assert!( + stderr.is_empty(), + "Passthrough hook mode should produce zero stderr, got: {stderr}" + ); +} diff --git a/crates/rskim/tests/cli_init.rs b/crates/rskim/tests/cli_init.rs index 4c94a96..619d939 100644 --- a/crates/rskim/tests/cli_init.rs +++ b/crates/rskim/tests/cli_init.rs @@ -759,23 +759,28 @@ fn test_hook_pipe_command_passthrough() { #[test] fn test_hook_version_mismatch_warning() { - // Set SKIM_HOOK_VERSION to a value that differs from the compiled version, - // triggering the version mismatch warning on stderr. + // Use a temp dir for cache to avoid stamp file pollution across tests. + let cache_dir = TempDir::new().unwrap(); + + // Set SKIM_HOOK_VERSION to a value that differs from the compiled version. + // The warning now goes to hook.log (NEVER stderr -- GRANITE #361 Bug 3). let output = Command::cargo_bin("skim") .unwrap() .args(["rewrite", "--hook"]) .env("SKIM_HOOK_VERSION", "0.0.1") + .env("SKIM_CACHE_DIR", cache_dir.path().as_os_str()) .write_stdin(hook_payload("cargo test")) .assert() .success(); + // CRITICAL: stderr MUST be empty in hook mode (zero-stderr invariant) let stderr = String::from_utf8(output.get_output().stderr.clone()).unwrap(); assert!( - stderr.contains("version mismatch"), - "Should warn about version mismatch on stderr, got: {stderr}" + stderr.is_empty(), + "Hook mode must have zero stderr even on version mismatch, got: {stderr}" ); - // The rewrite should still succeed despite the warning + // The rewrite should still succeed let stdout = String::from_utf8(output.get_output().stdout.clone()).unwrap(); let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); assert!( @@ -783,7 +788,19 @@ fn test_hook_version_mismatch_warning() { .as_str() .unwrap() .contains("skim test cargo"), - "Rewrite should succeed despite version mismatch warning" + "Rewrite should succeed despite version mismatch" + ); + + // Verify warning went to hook.log file instead + let hook_log = cache_dir.path().join("hook.log"); + assert!( + hook_log.exists(), + "Version mismatch warning should be written to hook.log" + ); + let log_content = fs::read_to_string(&hook_log).unwrap(); + assert!( + log_content.contains("version mismatch"), + "hook.log should contain version mismatch warning, got: {log_content}" ); } @@ -815,3 +832,29 @@ fn test_rewrite_hook_help() { .success() .stdout(predicate::str::contains("--hook")); } + +// ============================================================================ +// Phase 6: Multi-agent awareness in skim init +// ============================================================================ + +#[test] +fn test_init_help_mentions_agent_flag() { + // init --help should document the --agent flag for multi-agent support + Command::cargo_bin("skim") + .unwrap() + .args(["init", "--help"]) + .assert() + .success() + .stdout(predicate::str::contains("--agent")); +} + +#[test] +fn test_rewrite_help_mentions_agent_flag() { + // rewrite --help should mention the --agent flag + Command::cargo_bin("skim") + .unwrap() + .args(["rewrite", "--help"]) + .assert() + .success() + .stdout(predicate::str::contains("--agent")); +} diff --git a/crates/rskim/tests/cli_integrity.rs b/crates/rskim/tests/cli_integrity.rs new file mode 100644 index 0000000..dd05290 --- /dev/null +++ b/crates/rskim/tests/cli_integrity.rs @@ -0,0 +1,308 @@ +//! Integration tests for hook integrity verification (#57). +//! +//! Tests the full lifecycle: install creates SHA-256 manifest, uninstall checks +//! integrity, tampered scripts require --force, and hook mode logs warnings +//! to file (NEVER stderr). + +use assert_cmd::Command; +use predicates::prelude::*; +use std::fs; +use std::os::unix::fs::PermissionsExt; +use tempfile::TempDir; + +// ============================================================================ +// Helper: build an isolated `skim init` command with CLAUDE_CONFIG_DIR override +// ============================================================================ + +fn skim_init_cmd(config_dir: &std::path::Path) -> Command { + let mut cmd = Command::cargo_bin("skim").unwrap(); + cmd.arg("init") + .env("CLAUDE_CONFIG_DIR", config_dir.as_os_str()); + cmd +} + +fn skim_rewrite_hook_cmd(config_dir: &std::path::Path) -> Command { + let mut cmd = Command::cargo_bin("skim").unwrap(); + cmd.args(["rewrite", "--hook"]) + .env("CLAUDE_CONFIG_DIR", config_dir.as_os_str()); + cmd +} + +// ============================================================================ +// Install creates SHA-256 file +// ============================================================================ + +#[test] +fn test_install_creates_sha256_file() { + let dir = TempDir::new().unwrap(); + let config = dir.path(); + + skim_init_cmd(config).args(["--yes"]).assert().success(); + + // Verify the SHA-256 manifest was created + let manifest_path = config.join("hooks/skim-claude-code.sha256"); + assert!( + manifest_path.exists(), + "SHA-256 manifest should be created on install" + ); + + // Verify manifest format: sha256: skim-rewrite.sh + let content = fs::read_to_string(&manifest_path).unwrap(); + assert!( + content.starts_with("sha256:"), + "Manifest should start with sha256: prefix, got: {content}" + ); + assert!( + content.contains("skim-rewrite.sh"), + "Manifest should reference the script name, got: {content}" + ); + + // Verify hash is valid hex (64 chars for SHA-256) + let hash = content + .strip_prefix("sha256:") + .unwrap() + .split_whitespace() + .next() + .unwrap(); + assert_eq!(hash.len(), 64, "SHA-256 hash should be 64 hex chars"); + assert!( + hash.chars().all(|c| c.is_ascii_hexdigit()), + "Hash should be valid hex" + ); +} + +// ============================================================================ +// Upgrade recomputes hash +// ============================================================================ + +#[test] +fn test_upgrade_recomputes_hash() { + let dir = TempDir::new().unwrap(); + let config = dir.path(); + + // First install + skim_init_cmd(config).args(["--yes"]).assert().success(); + + let manifest_path = config.join("hooks/skim-claude-code.sha256"); + let _hash1 = fs::read_to_string(&manifest_path).unwrap(); + + // Modify the hook script version to simulate an upgrade scenario + let script_path = config.join("hooks/skim-rewrite.sh"); + let content = fs::read_to_string(&script_path).unwrap(); + let modified = content.replace("skim-hook v", "skim-hook v0.0.0-old-"); + fs::write(&script_path, &modified).unwrap(); + + // Re-run init (upgrade) -- should recompute hash + skim_init_cmd(config).args(["--yes"]).assert().success(); + + let hash2 = fs::read_to_string(&manifest_path).unwrap(); + // The hash should be different because the script content changed during upgrade + // (Actually, the install flow writes a NEW script with the current version, + // so the hash will match the freshly-written script) + assert!( + hash2.starts_with("sha256:"), + "After upgrade, manifest should still be valid" + ); +} + +// ============================================================================ +// Uninstall tampered requires --force +// ============================================================================ + +#[test] +fn test_uninstall_tampered_requires_force() { + let dir = TempDir::new().unwrap(); + let config = dir.path(); + + // Install + skim_init_cmd(config).args(["--yes"]).assert().success(); + + // Tamper with the hook script + let script_path = config.join("hooks/skim-rewrite.sh"); + fs::write(&script_path, "#!/bin/bash\necho 'tampered'\n").unwrap(); + // Keep it executable + let perms = std::fs::Permissions::from_mode(0o755); + fs::set_permissions(&script_path, perms).unwrap(); + + // Uninstall WITHOUT --force should fail + skim_init_cmd(config) + .args(["--uninstall", "--yes"]) + .assert() + .failure() + .stderr(predicate::str::contains("modified since installation")) + .stderr(predicate::str::contains("--force")); +} + +#[test] +fn test_uninstall_with_force_bypasses_warning() { + let dir = TempDir::new().unwrap(); + let config = dir.path(); + + // Install + skim_init_cmd(config).args(["--yes"]).assert().success(); + + // Tamper with the hook script + let script_path = config.join("hooks/skim-rewrite.sh"); + fs::write(&script_path, "#!/bin/bash\necho 'tampered'\n").unwrap(); + let perms = std::fs::Permissions::from_mode(0o755); + fs::set_permissions(&script_path, perms).unwrap(); + + // Uninstall WITH --force should succeed + skim_init_cmd(config) + .args(["--uninstall", "--yes", "--force"]) + .assert() + .success() + .stderr(predicate::str::contains("proceeding with --force")); + + // Script should be deleted + assert!( + !script_path.exists(), + "Hook script should be deleted after forced uninstall" + ); + + // Hash manifest should also be cleaned up + let manifest_path = config.join("hooks/skim-claude-code.sha256"); + assert!( + !manifest_path.exists(), + "Hash manifest should be cleaned up after uninstall" + ); +} + +// ============================================================================ +// Uninstall clean script proceeds normally +// ============================================================================ + +#[test] +fn test_uninstall_clean_script_proceeds() { + let dir = TempDir::new().unwrap(); + let config = dir.path(); + + // Install + skim_init_cmd(config).args(["--yes"]).assert().success(); + + // Uninstall without tampering -- should succeed without --force + skim_init_cmd(config) + .args(["--uninstall", "--yes"]) + .assert() + .success(); + + // Everything should be cleaned up + let script_path = config.join("hooks/skim-rewrite.sh"); + assert!(!script_path.exists(), "Script should be deleted"); + let manifest_path = config.join("hooks/skim-claude-code.sha256"); + assert!(!manifest_path.exists(), "Manifest should be deleted"); +} + +// ============================================================================ +// Hook mode: tamper warning goes to log, NOT stderr +// ============================================================================ + +#[test] +fn test_hook_mode_tamper_warning_goes_to_log_not_stderr() { + let dir = TempDir::new().unwrap(); + let config = dir.path(); + let cache_dir = TempDir::new().unwrap(); + + // Install + skim_init_cmd(config).args(["--yes"]).assert().success(); + + // Tamper with the hook script + let script_path = config.join("hooks/skim-rewrite.sh"); + fs::write(&script_path, "#!/bin/bash\necho 'tampered'\n").unwrap(); + + // Run hook mode with a simple command + let hook_input = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + + // Override SKIM_CACHE_DIR so we can find the log file + skim_rewrite_hook_cmd(config) + .env("SKIM_CACHE_DIR", cache_dir.path().as_os_str()) + .write_stdin(hook_input.to_string()) + .assert() + .success() + // CRITICAL: stderr must NOT contain the tamper warning + .stderr(predicate::str::contains("tampered").not()); + + // The warning SHOULD appear in the log file. + // SKIM_CACHE_DIR points directly to the skim cache dir. + let log_path = cache_dir.path().join("hook.log"); + assert!( + log_path.exists(), + "Hook log file should exist at {}", + log_path.display() + ); + let log_content = fs::read_to_string(&log_path).unwrap(); + assert!( + log_content.contains("tampered"), + "Hook log should contain tamper warning, got: {log_content}" + ); +} + +// ============================================================================ +// Cleanup removes SHA-256 on uninstall +// ============================================================================ + +#[test] +fn test_cleanup_removes_sha256() { + let dir = TempDir::new().unwrap(); + let config = dir.path(); + + // Install + skim_init_cmd(config).args(["--yes"]).assert().success(); + + let manifest_path = config.join("hooks/skim-claude-code.sha256"); + assert!( + manifest_path.exists(), + "Manifest should exist after install" + ); + + // Uninstall + skim_init_cmd(config) + .args(["--uninstall", "--yes"]) + .assert() + .success(); + + assert!( + !manifest_path.exists(), + "Manifest should be removed after uninstall" + ); +} + +// ============================================================================ +// Integrity suppresses version mismatch +// ============================================================================ + +#[test] +fn test_integrity_suppresses_version_mismatch() { + let dir = TempDir::new().unwrap(); + let config = dir.path(); + let cache_dir = TempDir::new().unwrap(); + + // Install + skim_init_cmd(config).args(["--yes"]).assert().success(); + + // Tamper with the hook script + let script_path = config.join("hooks/skim-rewrite.sh"); + fs::write(&script_path, "#!/bin/bash\necho 'tampered'\n").unwrap(); + + // Run hook mode with a MISMATCHED version env + let hook_input = serde_json::json!({ + "tool_input": { + "command": "cargo test" + } + }); + + // Set a mismatched hook version -- integrity warning should subsume it + skim_rewrite_hook_cmd(config) + .env("SKIM_HOOK_VERSION", "0.0.0-fake") + .env("SKIM_CACHE_DIR", cache_dir.path().as_os_str()) + .write_stdin(hook_input.to_string()) + .assert() + .success() + // CRITICAL: stderr must NOT contain version mismatch warning + // (integrity warning subsumes it) + .stderr(predicate::str::contains("version mismatch").not()); +} diff --git a/crates/rskim/tests/cli_learn.rs b/crates/rskim/tests/cli_learn.rs index cf94d06..664c5bb 100644 --- a/crates/rskim/tests/cli_learn.rs +++ b/crates/rskim/tests/cli_learn.rs @@ -90,7 +90,7 @@ fn test_learn_generate_writes_file() { .stdout(predicate::str::contains("Wrote corrections to:")); // Verify the file was created - let rules_file = work_dir.path().join(".claude/rules/cli-corrections.md"); + let rules_file = work_dir.path().join(".claude/rules/skim-corrections.md"); assert!(rules_file.exists(), "Rules file should be created"); let content = std::fs::read_to_string(&rules_file).unwrap(); assert!(content.contains("CLI Corrections"), "Should have header"); @@ -222,3 +222,239 @@ fn test_learn_no_bash_commands() { .assert() .success(); } + +// ============================================================================ +// Phase 6: Cross-agent learn tests -- per-agent rules file format +// ============================================================================ + +#[test] +fn test_learn_generate_claude_code_writes_md_file() { + let dir = TempDir::new().unwrap(); + let project_dir = dir.path().join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + + let fixture = include_str!("fixtures/cmd/session/session_errors.jsonl"); + std::fs::write(project_dir.join("error-session.jsonl"), fixture).unwrap(); + + let work_dir = TempDir::new().unwrap(); + + skim_cmd() + .args([ + "learn", + "--generate", + "--agent", + "claude-code", + "--since", + "7d", + ]) + .env("SKIM_PROJECTS_DIR", dir.path().to_str().unwrap()) + .current_dir(work_dir.path()) + .assert() + .success() + .stdout(predicate::str::contains("Wrote corrections to:")); + + let rules_file = work_dir.path().join(".claude/rules/skim-corrections.md"); + assert!( + rules_file.exists(), + "Claude Code rules file should be at .claude/rules/skim-corrections.md" + ); + let content = std::fs::read_to_string(&rules_file).unwrap(); + assert!(content.contains("CLI Corrections"), "Should have header"); + // Claude Code format: no frontmatter + assert!( + !content.starts_with("---"), + "Claude Code format should NOT have frontmatter" + ); +} + +#[test] +fn test_learn_generate_default_dry_run_preview() { + // Cursor rules format test: use Claude Code sessions (the error patterns + // are agent-agnostic) but request Cursor format output. + // + // Since --agent cursor filters providers to Cursor-only (which requires + // a SQLite DB we can't easily mock in integration tests), we test via + // dry-run with the Claude Code provider but default agent, then verify + // the unit-test-covered cursor format separately. + // + // The unit tests in learn.rs::tests::test_generate_rules_content_cursor_frontmatter + // already validate the Cursor frontmatter format. This integration test + // confirms the default (Claude Code) pipeline works end-to-end. + let dir = TempDir::new().unwrap(); + let project_dir = dir.path().join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + + let fixture = include_str!("fixtures/cmd/session/session_errors.jsonl"); + std::fs::write(project_dir.join("error-session.jsonl"), fixture).unwrap(); + + // Verify the default --generate path works (Claude Code format) + let work_dir = TempDir::new().unwrap(); + skim_cmd() + .args(["learn", "--generate", "--dry-run", "--since", "7d"]) + .env("SKIM_PROJECTS_DIR", dir.path().to_str().unwrap()) + .current_dir(work_dir.path()) + .assert() + .success() + .stdout(predicate::str::contains("Would write to:")) + .stdout(predicate::str::contains("CLI Corrections")); +} + +#[test] +fn test_learn_generate_copilot_writes_instructions_md_with_frontmatter() { + // Create Copilot-format session fixture with error patterns + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + let copilot_dir = dir.path().join("copilot-sessions"); + std::fs::create_dir_all(&copilot_dir).unwrap(); + + // Copilot JSONL with an error-retry pair (carg test -> cargo test) + let copilot_session = concat!( + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "carg test"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" }"#, + "\n", + r#"{ "type": "tool_result", "toolUseId": "t-001", "resultType": "error", "content": "error: command not found: carg", "timestamp": "2024-06-15T10:01:05Z" }"#, + "\n", + r#"{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "cargo test"}, "id": "t-002", "timestamp": "2024-06-15T10:02:00Z" }"#, + "\n", + r#"{ "type": "tool_result", "toolUseId": "t-002", "resultType": "success", "content": "test result: ok. 5 passed; 0 failed", "timestamp": "2024-06-15T10:02:05Z" }"#, + "\n" + ); + std::fs::write(copilot_dir.join("error-session.jsonl"), copilot_session).unwrap(); + + let work_dir = TempDir::new().unwrap(); + + let mut cmd = skim_cmd(); + cmd.args(["learn", "--generate", "--agent", "copilot", "--since", "7d"]) + .env("SKIM_COPILOT_DIR", copilot_dir.to_str().unwrap()) + .env("SKIM_PROJECTS_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_CODEX_SESSIONS_DIR", nonexistent.to_str().unwrap()) + .env( + "SKIM_CURSOR_DB_PATH", + nonexistent.join("no-cursor.vscdb").to_str().unwrap(), + ) + .env("SKIM_GEMINI_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_OPENCODE_DIR", nonexistent.to_str().unwrap()) + .current_dir(work_dir.path()); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Wrote corrections to:")); + + let rules_file = work_dir + .path() + .join(".github/instructions/skim-corrections.instructions.md"); + assert!( + rules_file.exists(), + "Copilot rules file should be at .github/instructions/skim-corrections.instructions.md" + ); + let content = std::fs::read_to_string(&rules_file).unwrap(); + assert!( + content.starts_with("---\napplyTo:"), + "Copilot format should have applyTo frontmatter, got: {}", + &content[..content.len().min(100)] + ); + assert!(content.contains("CLI Corrections"), "Should have header"); +} + +#[test] +fn test_learn_generate_codex_prints_to_stdout_no_file() { + // Create Codex-format session fixture with error patterns in YYYY/MM/DD structure + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + let codex_dir = dir.path().join("codex-sessions"); + let codex_session_dir = codex_dir.join("2026/03/25"); + std::fs::create_dir_all(&codex_session_dir).unwrap(); + + // Codex JSONL with an error-retry pair (carg test -> cargo test) + let codex_session = concat!( + r#"{"type":"codex.tool_decision","tool":"bash","args":{"command":"carg test"},"timestamp":"2026-03-01T10:00:00Z","session_id":"sess-err","tool_decision_id":"td-001"}"#, + "\n", + r#"{"type":"codex.tool_result","tool":"bash","result":{"content":"error: command not found: carg","is_error":true},"timestamp":"2026-03-01T10:00:01Z","session_id":"sess-err","tool_decision_id":"td-001"}"#, + "\n", + r#"{"type":"codex.tool_decision","tool":"bash","args":{"command":"cargo test"},"timestamp":"2026-03-01T10:00:02Z","session_id":"sess-err","tool_decision_id":"td-002"}"#, + "\n", + r#"{"type":"codex.tool_result","tool":"bash","result":{"content":"test result: ok. 5 passed; 0 failed","is_error":false},"timestamp":"2026-03-01T10:00:03Z","session_id":"sess-err","tool_decision_id":"td-002"}"#, + "\n" + ); + std::fs::write( + codex_session_dir.join("rollout-errors.jsonl"), + codex_session, + ) + .unwrap(); + + let work_dir = TempDir::new().unwrap(); + + // Codex has no rules_dir() (returns None), so content is printed to stdout + let mut cmd = skim_cmd(); + cmd.args(["learn", "--generate", "--agent", "codex", "--since", "7d"]) + .env("SKIM_CODEX_SESSIONS_DIR", codex_dir.to_str().unwrap()) + .env("SKIM_PROJECTS_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_COPILOT_DIR", nonexistent.to_str().unwrap()) + .env( + "SKIM_CURSOR_DB_PATH", + nonexistent.join("no-cursor.vscdb").to_str().unwrap(), + ) + .env("SKIM_GEMINI_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_OPENCODE_DIR", nonexistent.to_str().unwrap()) + .current_dir(work_dir.path()); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Add the following to your")) + .stdout(predicate::str::contains("CLI Corrections")); + + // No file should have been written in the work dir + assert!( + !work_dir.path().join(".codex").exists(), + "Codex should NOT create a file, only print to stdout" + ); +} + +#[test] +fn test_learn_no_cross_agent_data_leakage() { + // Create Claude Code session with errors, but filter to codex. + // Codex has an empty session with no errors. + // Result: no corrections found (codex sessions have no errors). + let dir = TempDir::new().unwrap(); + let nonexistent = dir.path().join("nonexistent"); + + // Claude Code session with errors + let claude_dir = dir.path().join("claude-projects"); + let project_dir = claude_dir.join("test-project"); + std::fs::create_dir_all(&project_dir).unwrap(); + let fixture = include_str!("fixtures/cmd/session/session_errors.jsonl"); + std::fs::write(project_dir.join("error-session.jsonl"), fixture).unwrap(); + + // Codex session dir with a clean (no-error) session + let codex_dir = dir.path().join("codex-sessions"); + let codex_session_dir = codex_dir.join("2026/03/25"); + std::fs::create_dir_all(&codex_session_dir).unwrap(); + std::fs::write( + codex_session_dir.join("rollout-clean.jsonl"), + concat!( + r#"{"type":"codex.tool_decision","tool":"bash","args":{"command":"ls"},"timestamp":"2026-03-01T10:00:00Z","session_id":"sess-clean","tool_decision_id":"td-001"}"#, + "\n", + r#"{"type":"codex.tool_result","tool":"bash","result":{"content":"file1.rs","is_error":false},"timestamp":"2026-03-01T10:00:01Z","session_id":"sess-clean","tool_decision_id":"td-001"}"#, + "\n" + ), + ) + .unwrap(); + + // Filter to codex -- should NOT find Claude Code's error patterns + let mut cmd = skim_cmd(); + cmd.args(["learn", "--agent", "codex", "--since", "7d"]) + .env("SKIM_PROJECTS_DIR", claude_dir.to_str().unwrap()) + .env("SKIM_CODEX_SESSIONS_DIR", codex_dir.to_str().unwrap()) + .env("SKIM_COPILOT_DIR", nonexistent.to_str().unwrap()) + .env( + "SKIM_CURSOR_DB_PATH", + nonexistent.join("no-cursor.vscdb").to_str().unwrap(), + ) + .env("SKIM_GEMINI_DIR", nonexistent.to_str().unwrap()) + .env("SKIM_OPENCODE_DIR", nonexistent.to_str().unwrap()); + + cmd.assert().success().stdout( + predicate::str::contains("No CLI error patterns detected") + .or(predicate::str::contains("No Bash commands found")) + .or(predicate::str::contains("No tool invocations")), + ); +} diff --git a/crates/rskim/tests/cli_rewrite.rs b/crates/rskim/tests/cli_rewrite.rs index 3c57023..05ddcbd 100644 --- a/crates/rskim/tests/cli_rewrite.rs +++ b/crates/rskim/tests/cli_rewrite.rs @@ -243,6 +243,92 @@ fn test_rewrite_compound_bail_on_variable_expansion() { .failure(); } +// ============================================================================ +// Shell redirects (GRANITE #530) +// ============================================================================ + +#[test] +fn test_rewrite_redirect_stderr_to_stdout() { + Command::cargo_bin("skim") + .unwrap() + .arg("rewrite") + .write_stdin("cargo test 2>&1\n") + .assert() + .success() + .stdout(predicate::str::contains("skim test cargo 2>&1")); +} + +#[test] +fn test_rewrite_redirect_stderr_to_stdout_pipe() { + Command::cargo_bin("skim") + .unwrap() + .arg("rewrite") + .write_stdin("cargo test 2>&1 | head\n") + .assert() + .success() + .stdout(predicate::str::contains("skim test cargo 2>&1")) + .stdout(predicate::str::contains("|")) + .stdout(predicate::str::contains("head")); +} + +#[test] +fn test_rewrite_redirect_stderr_to_stdout_compound() { + Command::cargo_bin("skim") + .unwrap() + .arg("rewrite") + .write_stdin("cargo test 2>&1 && cargo build\n") + .assert() + .success() + .stdout(predicate::str::contains("skim test cargo 2>&1")) + .stdout(predicate::str::contains("&&")) + .stdout(predicate::str::contains("skim build cargo")); +} + +#[test] +fn test_rewrite_redirect_stderr_to_devnull() { + Command::cargo_bin("skim") + .unwrap() + .arg("rewrite") + .write_stdin("cargo test 2>/dev/null\n") + .assert() + .success() + .stdout(predicate::str::contains("skim test cargo 2>/dev/null")); +} + +#[test] +fn test_rewrite_redirect_stdout_to_file() { + Command::cargo_bin("skim") + .unwrap() + .arg("rewrite") + .write_stdin("cargo test > output.txt\n") + .assert() + .success() + .stdout(predicate::str::contains("skim test cargo > output.txt")); +} + +#[test] +fn test_rewrite_redirect_both_to_file() { + Command::cargo_bin("skim") + .unwrap() + .arg("rewrite") + .write_stdin("cargo test &> output.txt\n") + .assert() + .success() + .stdout(predicate::str::contains("skim test cargo &> output.txt")); +} + +#[test] +fn test_rewrite_redirect_git_with_skip_flags() { + // Redirect must not trigger skip_if_flag_prefix (--porcelain, --stat, etc.) + Command::cargo_bin("skim") + .unwrap() + .arg("rewrite") + .write_stdin("git status 2>&1\n") + .assert() + .success() + .stdout(predicate::str::contains("skim git status 2>&1")); +} + // ============================================================================ // Git with skip flags // ============================================================================ diff --git a/crates/rskim/tests/fixtures/cmd/session/copilot/sample-session.jsonl b/crates/rskim/tests/fixtures/cmd/session/copilot/sample-session.jsonl new file mode 100644 index 0000000..013b0f3 --- /dev/null +++ b/crates/rskim/tests/fixtures/cmd/session/copilot/sample-session.jsonl @@ -0,0 +1,11 @@ +--- +model: gpt-4o +session_start: "2024-06-15T10:00:00Z" +project: "/home/user/myproject" +--- +{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "cargo test --all"}, "id": "t-001", "timestamp": "2024-06-15T10:01:00Z" } +{ "type": "tool_result", "toolUseId": "t-001", "resultType": "success", "content": "test result: ok. 42 passed", "timestamp": "2024-06-15T10:01:05Z" } +{ "type": "tool_use", "toolName": "bash", "toolArgs": {"command": "git diff --stat"}, "id": "t-002", "timestamp": "2024-06-15T10:02:00Z" } +{ "type": "tool_result", "toolUseId": "t-002", "resultType": "error", "content": "fatal: not a git repository", "timestamp": "2024-06-15T10:02:01Z" } +{ "type": "tool_use", "toolName": "readFile", "toolArgs": {"path": "/home/user/myproject/src/main.rs"}, "id": "t-003", "timestamp": "2024-06-15T10:03:00Z" } +{ "type": "tool_result", "toolUseId": "t-003", "resultType": "success", "content": "fn main() { println!(\"hello\"); }", "timestamp": "2024-06-15T10:03:01Z" } diff --git a/crates/rskim/tests/fixtures/cmd/session/gemini_sample.jsonl b/crates/rskim/tests/fixtures/cmd/session/gemini_sample.jsonl new file mode 100644 index 0000000..31f281a --- /dev/null +++ b/crates/rskim/tests/fixtures/cmd/session/gemini_sample.jsonl @@ -0,0 +1,6 @@ +{"type":"tool_use","tool":"shell","args":{"command":"cargo test"},"id":"tu-001"} +{"type":"tool_result","tool_use_id":"tu-001","content":"test result: ok. 5 passed; 0 failed","is_error":false} +{"type":"tool_use","tool":"read_file","args":{"file_path":"/src/main.rs"},"id":"tu-002"} +{"type":"tool_result","tool_use_id":"tu-002","content":"fn main() {\n println!(\"hello\");\n}","is_error":false} +{"type":"tool_use","tool":"shell","args":{"command":"git status"},"id":"tu-003"} +{"type":"tool_result","tool_use_id":"tu-003","content":"On branch main\nnothing to commit","is_error":false} diff --git a/crates/rskim/tests/fixtures/codex/sample-session.jsonl b/crates/rskim/tests/fixtures/codex/sample-session.jsonl new file mode 100644 index 0000000..f227690 --- /dev/null +++ b/crates/rskim/tests/fixtures/codex/sample-session.jsonl @@ -0,0 +1,6 @@ +{"type":"codex.tool_decision","tool":"bash","args":{"command":"cargo test"},"timestamp":"2026-03-01T10:00:00Z","session_id":"sess-abc","tool_decision_id":"td-001"} +{"type":"codex.tool_result","tool":"bash","result":{"content":"test result: ok. 5 passed; 0 failed","is_error":false},"timestamp":"2026-03-01T10:00:01Z","session_id":"sess-abc","tool_decision_id":"td-001"} +{"type":"codex.tool_decision","tool":"read","args":{"file_path":"/tmp/main.rs"},"timestamp":"2026-03-01T10:00:02Z","session_id":"sess-abc","tool_decision_id":"td-002"} +{"type":"codex.tool_result","tool":"read","result":{"content":"fn main() { println!(\"hello\"); }","is_error":false},"timestamp":"2026-03-01T10:00:03Z","session_id":"sess-abc","tool_decision_id":"td-002"} +{"type":"codex.tool_decision","tool":"bash","args":{"command":"git diff"},"timestamp":"2026-03-01T10:00:04Z","session_id":"sess-abc","tool_decision_id":"td-003"} +{"type":"codex.tool_result","tool":"bash","result":{"content":"error: not a git repository","is_error":true},"timestamp":"2026-03-01T10:00:05Z","session_id":"sess-abc","tool_decision_id":"td-003"}