diff --git a/.github/workflows/deploy-litellm.yml b/.github/workflows/deploy-litellm.yml index 41f04e5d..f39d772e 100644 --- a/.github/workflows/deploy-litellm.yml +++ b/.github/workflows/deploy-litellm.yml @@ -150,7 +150,7 @@ jobs: echo "" echo "Testing health endpoint..." sleep 5 - curl -f http://localhost:4000/health || echo "Health check failed - service may still be starting" + curl -I http://localhost:4000/models || echo "Health check failed - service may still be starting" - name: Cleanup if: always() diff --git a/conf/claude-local-marketplace/skills/agent-doc/SKILL.md b/conf/claude-local-marketplace/skills/agent-doc/SKILL.md new file mode 100644 index 00000000..264b9cbe --- /dev/null +++ b/conf/claude-local-marketplace/skills/agent-doc/SKILL.md @@ -0,0 +1,135 @@ +--- +name: agent-doc +description: This skill should be used when managing, creating, or organizing agent documentation (CLAUDE.md, AGENTS.md). Triggered by phrases like [agent doc], [update claude.md], [add agent instruction], [organize agent docs], [create reference doc for agents]. Use this to keep main agent docs concise while linking to detailed reference documents. +--- + +# Agent Doc + +## Overview + +Manage agent documentation with a layered approach: keep main entry docs (CLAUDE.md, AGENTS.md) concise with references to detailed docs that agents load on-demand. This reduces context overhead while maintaining comprehensive guidance. + +## Core Principles + +1. **Concise Entry Docs** - Main docs contain only triggers and pointers, not full content +2. **On-Demand Loading** - Detailed docs loaded only when relevant task is triggered +3. **Single Source of Truth** - Each topic lives in one place only +4. **Progressive Disclosure** - Surface-level in main doc, depth in references + +## Doc Structure + +``` +project/ +├── CLAUDE.md # Main entry (concise, ~500 words max) +├── AGENTS.md # Agent-specific rules (optional) +└── .claude/ + └── docs/ + ├── vue-components.md # Detailed: Vue patterns + ├── api-guidelines.md # Detailed: API conventions + ├── testing-rules.md # Detailed: Test requirements + └── ... +``` + +## Writing Main Entry Docs + +### Format for CLAUDE.md/AGENTS.md + +Use conditional loading statements instead of inline content: + +```markdown +# Project Instructions + +## Code Style +- Follow existing patterns in codebase +- If writing Vue components, read `.claude/docs/vue-components.md` +- If writing API endpoints, read `.claude/docs/api-guidelines.md` + +## Testing +- All features require tests +- For testing guidelines, read `.claude/docs/testing-rules.md` +``` + +### What Belongs in Main Doc + +- Project name and brief purpose +- Critical constraints (security, performance) +- Conditional pointers to detailed docs +- High-level workflow triggers + +### What Goes in Reference Docs + +- Detailed examples and code patterns +- Step-by-step procedures +- Schema definitions and API specs +- Domain-specific knowledge + +## Creating Reference Docs + +### Naming Convention + +Use descriptive kebab-case names: +- `vue-components.md` - Component patterns +- `api-v2-migration.md` - Migration guide +- `auth-flow.md` - Authentication details + +### Reference Doc Template + +```markdown +# [Topic Name] + +## When to Use +[Brief description of when agent should load this doc] + +## Guidelines +[Main content - patterns, rules, examples] + +## Examples +[Concrete code examples if applicable] + +## Common Mistakes +[What to avoid] +``` + +## Workflow + +### Adding New Instructions + +1. Determine if instruction is universal or conditional +2. Universal → Add brief line to main doc +3. Conditional → Create/update reference doc in `.claude/docs/` +4. Add pointer in main doc: "If doing X, read `.claude/docs/x.md`" + +### Auditing Existing Docs + +1. Check main doc line count (target: <100 lines) +2. Identify inline content that should be extracted +3. Group related instructions into reference docs +4. Replace inline content with conditional pointers + +### Reorganizing Docs + +To refactor bloated main docs: + +1. Read current CLAUDE.md/AGENTS.md +2. Categorize content by topic/trigger condition +3. Create reference docs for each category +4. Rewrite main doc with pointers only +5. Verify no duplicate content exists + +## Best Practices + +- **One topic per reference doc** - Easier to maintain and load +- **Use clear trigger phrases** - "If writing...", "When debugging...", "For API..." +- **Keep references self-contained** - Should make sense without main doc context +- **Version reference docs** - Include date or version if content evolves +- **Test the flow** - Simulate agent loading to verify pointers work + +## Resources + +### references/reference-doc-template.md + +Template for creating new agent reference docs. Copy this template when creating a new reference doc in `.claude/docs/`. + +### references/writing-best-practices.md + +Detailed guidance on writing effective agent documentation, including structure, language, sizing, and common mistakes. Read when improving doc quality or reorganizing existing docs. diff --git a/conf/claude-local-marketplace/skills/agent-doc/references/reference-doc-template.md b/conf/claude-local-marketplace/skills/agent-doc/references/reference-doc-template.md new file mode 100644 index 00000000..ff5ffb1f --- /dev/null +++ b/conf/claude-local-marketplace/skills/agent-doc/references/reference-doc-template.md @@ -0,0 +1,40 @@ +# [Topic Name] + +## When to Use + +[Brief 1-2 sentence description of when an agent should load this reference doc. Example: "Load this when implementing Vue components or refactoring existing Vue code."] + +## Guidelines + +[Main instructional content - include specific rules, patterns, conventions, or requirements] + +### Sub-section 1 + +[Organize content into logical sections] + +### Sub-section 2 + +[More detailed guidance] + +## Examples + +### Example 1: [Description] + +```[language] +// Code example showing the pattern in practice +``` + +### Example 2: [Description] + +```[language] +// Another concrete example +``` + +## Common Mistakes + +- **Mistake 1** - [What to avoid and why] +- **Mistake 2** - [What to avoid and why] + +## Related + +- See also: [Links to related reference docs if applicable] diff --git a/conf/claude-local-marketplace/skills/agent-doc/references/writing-best-practices.md b/conf/claude-local-marketplace/skills/agent-doc/references/writing-best-practices.md new file mode 100644 index 00000000..d227d054 --- /dev/null +++ b/conf/claude-local-marketplace/skills/agent-doc/references/writing-best-practices.md @@ -0,0 +1,85 @@ +# Writing Effective Agent Reference Docs + +## When to Use + +Load this when creating new reference documentation or improving existing agent docs. + +## Principles + +### Scannable Structure + +- Use clear headings (H2, H3) for navigation +- Lead sections with the most important info +- Keep paragraphs short (3-5 sentences max) +- Use lists for multiple items + +### Action-Oriented Language + +Use imperative form: +- ✅ "Use kebab-case for file names" +- ❌ "You should use kebab-case" +- ❌ "Files should be named using kebab-case" + +### Concrete Over Abstract + +- ✅ "Name files like `user-profile.vue`, not `UserProfile.vue`" +- ❌ "Follow consistent naming conventions" + +### Self-Contained Sections + +Each section should make sense independently. Agents may jump directly to a section via search. + +## Optimal Doc Sizes + +| Doc Type | Target Size | Max Size | +|----------|-------------|----------| +| Main entry (CLAUDE.md) | 50-100 lines | 150 lines | +| Reference doc | 100-300 lines | 500 lines | +| Quick reference | 20-50 lines | 100 lines | + +If a reference doc exceeds 500 lines, split into multiple focused docs. + +## Trigger Phrase Patterns + +Use consistent patterns in main docs to point to references: + +```markdown +# Clear triggers +- If writing [X], read `.claude/docs/[x].md` +- When debugging [Y], see `.claude/docs/[y]-debugging.md` +- For [Z] conventions, follow `.claude/docs/[z]-conventions.md` + +# Bad triggers (too vague) +- See docs for more info +- Check related documentation +- Refer to guidelines +``` + +## Content Categories + +### Critical (Main Doc) + +- Security constraints +- Breaking rules (what NEVER to do) +- Project-wide conventions + +### Conditional (Reference Docs) + +- Technology-specific patterns +- Feature area guidelines +- Domain knowledge +- Workflow procedures + +### Ephemeral (Don't Document) + +- Temporary workarounds +- One-time procedures +- Obvious conventions + +## Common Mistakes + +- **Over-documenting** - Not every pattern needs a doc; trust agent reasoning +- **Duplicating content** - Same info in main doc AND reference doc +- **Vague triggers** - "See docs" doesn't tell agent when to load +- **Stale content** - Docs that don't match actual codebase +- **Buried critical info** - Important constraints hidden in long docs diff --git a/conf/llm/docs/coding-rules.md b/conf/llm/docs/coding-rules.md index 63b65ec8..e8b39db1 100644 --- a/conf/llm/docs/coding-rules.md +++ b/conf/llm/docs/coding-rules.md @@ -6,7 +6,6 @@ ## Code of Conduct - Follow in the whole session. -- **Clarity:** If intent is ambiguous, try 1: Use `recent-history` Skill to check last session context. 2. Ask clear clarifying questions (e.g., “Do you mean X or Y?”). - **Good Output Format:** Use enhanced Markdown formatting for clarity. - **Divergent thinking:** Extend your knowledge with web, kg tools, then use divergent thinking, use this for design, issue debugging. - **Facts check on Plan/Outdated context:** It is 2025 year now, facts check based on existing code patterns, official documentation(from exa/web), or trusted sources (e.g., Stack Overflow). Do not assume facts without verification, your knowledge is 1 year behind. @@ -34,7 +33,7 @@ - **Fail Fast:** Let bugs surface; do not mask errors with `try-catch` or optional chaining. - **Comment Intent:** Use `FIXME`, `TODO`, and `NOTE` to flag issues, explain logic, document changes, and note trade-offs. - **Comment as documentation:** Document any implement intent, decisions, critical findings in the code comment. Especially after a fix, document the reason behind the change. -- **Design for Testability:** Apply DfT principles from the start—use dependency injection, prefer pure functions, avoid global state, and design for controllability and observability. Create seams for testing; isolate components to enable independent verification. +- **Design for Testability:** Apply DFT principles from the start—use dependency injection, prefer pure functions, avoid global state, and design for controllability and observability. Create seams for testing; isolate components to enable independent verification. - **Avoid introduce implement complexity:** No backward compatibility layers, feature flags, or toggles unless explicitly requested. - **No external data based design:** Avoid designs relying on external data, for example, use external api data to determine program logic or control flow, it will broke when external data changes. - **Avoid outdated dependency:** Use the latest stable version of dependencies unless there is a specific reason to use an older version. This is important to avoid big refactor later. diff --git a/nix/hm/ai/codex/default.nix b/nix/hm/ai/codex/default.nix index 01deef17..cf64cae3 100644 --- a/nix/hm/ai/codex/default.nix +++ b/nix/hm/ai/codex/default.nix @@ -7,7 +7,7 @@ let proxyConfig = import ../../../lib/proxy.nix { inherit lib pkgs; }; mcp = import ../../../modules/ai/mcp.nix { inherit pkgs lib config; }; - codex_home = "${config.xdg.configHome}/codex"; + codex_home = "${config.home.homeDirectory}/.codex"; codexMcpToml = builtins.readFile ( (pkgs.formats.toml { }).generate "codex-mcp.toml" { mcp_servers = mcp.clients.codex; } ); @@ -29,17 +29,17 @@ in codex-with-proxy ]; - xdg.configFile = { - "codex/instructions" = { + home.file = { + ".codex/instructions" = { source = ./instructions; recursive = true; }; - "codex/skills" = { + ".codex/skills" = { source = ../../../../conf/claude-local-marketplace/skills; recursive = true; }; # toml - "codex/config-generated.toml".text = '' + ".codex/config-generated.toml".text = '' model = "gpt-5.2-medium" model_provider = "packy" approval_policy = "untrusted" @@ -61,7 +61,7 @@ in name = "packy" wire_api = "responses" base_url = "https://www.packyapi.com/v1" - env_key = "PACKYCODE_CODEX_API_KEY" + http_headers = { "Authorization" = "Bearer ${pkgs.nix-priv.keys.customProviders.packyOpenaiKey}" } [model_providers.litellm] name = "litellm" @@ -141,13 +141,13 @@ in writable_roots = ["${config.home.homeDirectory}/workspace/work"] [shell_environment_policy] - inherit = "core" + inherit = "all" ignore_default_excludes = true # ["AWS_*"] - exclude = [] + exclude = ["LITELLM_*", "OPENROUTER_*", "ZAI_*", "MOONSHOT_*"] # if provided, *only* vars matching these patterns are kept - include_only = [] - set = { HTTP_PROXY = "${proxyConfig.proxies.http}", HTTPS_PROXY = "${proxyConfig.proxies.https}" } + # include_only = [] + set = { COPILOT = 1, HTTP_PROXY = "${proxyConfig.proxies.http}", HTTPS_PROXY = "${proxyConfig.proxies.https}" } ## MCP ${codexMcpToml} diff --git a/nix/hm/litellm.nix b/nix/hm/litellm.nix index dd4fa88d..5fe07ea7 100644 --- a/nix/hm/litellm.nix +++ b/nix/hm/litellm.nix @@ -113,7 +113,7 @@ in # Point Claude Code to LiteLLM proxy # ANTHROPIC_BASE_URL = "http://0.0.0.0:4000"; - ANTHROPIC_AUTH_TOKEN = pkgs.nix-priv.keys.litellm.apiKey; + # ANTHROPIC_AUTH_TOKEN = pkgs.nix-priv.keys.litellm.apiKey; # Claude Code model selection - configure which models to use for different tiers # These map to the model names defined in the LiteLLM config above diff --git a/nix/hm/litellm/bender-muffin.nix b/nix/hm/litellm/bender-muffin.nix index cfb37a7b..e8951e75 100644 --- a/nix/hm/litellm/bender-muffin.nix +++ b/nix/hm/litellm/bender-muffin.nix @@ -9,10 +9,10 @@ let in [ (providers.packyGemini.model { - model_name = "packy/gemini-3-flash"; + model_name = modelName; litellm_params = { model = "anthropic/gemini-3-flash-preview"; - rpm = 1; + rpm = 3; }; }) (providers.packyCc.model { @@ -27,7 +27,7 @@ in litellm_params = { model = "anthropic/MiniMax-M2.1"; max_tokens = 64000; - rpm = 5; + rpm = 2; }; }) (providers.zenmuxAnthropic.model { diff --git a/nix/hm/litellm/config-generator.nix b/nix/hm/litellm/config-generator.nix index be480f52..a70a1cd0 100644 --- a/nix/hm/litellm/config-generator.nix +++ b/nix/hm/litellm/config-generator.nix @@ -42,7 +42,6 @@ in litellm_settings = { REPEATED_STREAMING_CHUNK_LIMIT = 100; image_generation_model = "openrouter/x-ai/grok-4-fast"; - master_key = "os.environ/LITELLM_MASTER_KEY"; request_timeout = 600; num_retries = 2; allowed_fails = 3; @@ -54,6 +53,8 @@ in { "copilot/claude-haiku-4.5" = [ "opencodeai/claude-haiku-4-5" ]; } { "copilot/claude-sonnet-4.5" = [ "opencodeai/claude-sonnet-4.5" ]; } { "copilot/gpt-5-mini" = [ "openrouter/minimax/minimax-m2" ]; } + { "frontier-muffin" = [ "packy/claude-sonnet-4-5" ]; } + { "bender-muffin" = [ "packy/claude-haiku-4-5" ]; } ]; cache = false; cache_params = { @@ -70,7 +71,9 @@ in enable_json_schema_validation = true; }; general_settings = { - health_check_interval = 300; + master_key = pkgs.nix-priv.keys.litellm.apiKey; + background_health_checks = false; + health_check_interval = 300000; }; router_settings = { num_retries = 2; diff --git a/nix/hm/litellm/deploy/README.md b/nix/hm/litellm/deploy/README.md index 7364d006..6392e427 100644 --- a/nix/hm/litellm/deploy/README.md +++ b/nix/hm/litellm/deploy/README.md @@ -115,8 +115,14 @@ sudo journalctl -u litellm -n 100 ## Testing ```bash -# Health check -curl http://localhost:4000/health +# Health check (readiness - no auth required, FREE) +curl http://localhost:4000/health/readiness + +# Health check (liveliness - no auth required, FREE) +curl http://localhost:4000/health/liveliness + +# WARNING: DO NOT use /health endpoint - it costs tokens! +# curl http://localhost:4000/health # List models (requires master key from config) curl http://localhost:4000/v1/models \ @@ -150,3 +156,4 @@ curl http://localhost:4000/v1/chat/completions \ 2. **Secrets in Config**: The `config.yaml` built via nix contains embedded secrets from `nix-priv` 3. **Port**: LiteLLM listens on port 4000 by default 4. **User Isolation**: Runs as dedicated `litellm` system user for security +5. **SECURITY**: See `SECURITY.md` for critical information about protecting the `/health` endpoint from token drain attacks diff --git a/nix/hm/litellm/deploy/deploy.sh b/nix/hm/litellm/deploy/deploy.sh index 480915e2..efb3b0df 100644 --- a/nix/hm/litellm/deploy/deploy.sh +++ b/nix/hm/litellm/deploy/deploy.sh @@ -133,7 +133,10 @@ if systemctl is-active --quiet litellm; then echo "Service status: active" echo "" echo "View logs: sudo journalctl -u litellm -f" - echo "Check health: curl http://localhost:4000/health" + echo "" + echo "Health check endpoints (DO NOT use /health - it costs tokens!):" + echo " - Readiness: curl http://localhost:4000/health/readiness" + echo " - Liveliness: curl http://localhost:4000/health/liveliness" else echo "" echo "=== Deployment WARNING ===" diff --git a/nix/hm/litellm/frontier-muffin.nix b/nix/hm/litellm/frontier-muffin.nix index 6b1b8c04..36b81531 100644 --- a/nix/hm/litellm/frontier-muffin.nix +++ b/nix/hm/litellm/frontier-muffin.nix @@ -8,19 +8,13 @@ let modelName = "frontier-muffin"; in [ - (providers.packyGemini.model { - model_name = "packy/gemini-3-pro"; - litellm_params = { - model = "anthropic/gemini-3-pro-preview"; - rpm = 5; - }; - }) - (providers.packyOpenai.model { - model_name = modelName; - litellm_params = { - model = "anthropic/gpt-5.2-high"; - }; - }) + # (providers.packyGemini.model { + # model_name = modelName; + # litellm_params = { + # model = "anthropic/gemini-3-pro-preview"; + # rpm = 4; + # }; + # }) (providers.packyCc.model { model_name = modelName; litellm_params = { diff --git a/nix/hm/litellm/general-models.nix b/nix/hm/litellm/general-models.nix index 2d1d5267..9dfd4584 100644 --- a/nix/hm/litellm/general-models.nix +++ b/nix/hm/litellm/general-models.nix @@ -75,6 +75,9 @@ let litellm_params = { model = "openrouter/*"; }; + model_info = { + disable_background_health_check = true; + }; }) ]; @@ -323,13 +326,13 @@ let }; }) (providers.packyCc.model { - model_name = "packy/claude-opus-4-5-20251101"; + model_name = "packy/claude-opus-4-5"; litellm_params = { model = "anthropic/claude-opus-4-5-20251101"; }; }) (providers.packyCc.model { - model_name = "packy/claude-haiku-4-5-20251001"; + model_name = "packy/claude-haiku-4-5"; litellm_params = { model = "anthropic/claude-haiku-4-5-20251001"; };