diff --git a/.github/workflows/deploy-litellm.yml b/.github/workflows/deploy-litellm.yml
index 41f04e5d..f39d772e 100644
--- a/.github/workflows/deploy-litellm.yml
+++ b/.github/workflows/deploy-litellm.yml
@@ -150,7 +150,7 @@ jobs:
             echo ""
             echo "Testing health endpoint..."
             sleep 5
-            curl -f http://localhost:4000/health || echo "Health check failed - service may still be starting"
+            curl -I http://localhost:4000/models || echo "Health check failed - service may still be starting"
 
       - name: Cleanup
         if: always()
diff --git a/conf/claude-local-marketplace/skills/agent-doc/SKILL.md b/conf/claude-local-marketplace/skills/agent-doc/SKILL.md
new file mode 100644
index 00000000..264b9cbe
--- /dev/null
+++ b/conf/claude-local-marketplace/skills/agent-doc/SKILL.md
@@ -0,0 +1,135 @@
+---
+name: agent-doc
+description: This skill should be used when managing, creating, or organizing agent documentation (CLAUDE.md, AGENTS.md). Triggered by phrases like [agent doc], [update claude.md], [add agent instruction], [organize agent docs], [create reference doc for agents]. Use this to keep main agent docs concise while linking to detailed reference documents.
+---
+
+# Agent Doc
+
+## Overview
+
+Manage agent documentation with a layered approach: keep main entry docs (CLAUDE.md, AGENTS.md) concise with references to detailed docs that agents load on-demand. This reduces context overhead while maintaining comprehensive guidance.
+
+## Core Principles
+
+1. **Concise Entry Docs** - Main docs contain only triggers and pointers, not full content
+2. **On-Demand Loading** - Detailed docs loaded only when relevant task is triggered
+3. **Single Source of Truth** - Each topic lives in one place only
+4. **Progressive Disclosure** - Surface-level in main doc, depth in references
+
+## Doc Structure
+
+```
+project/
+├── CLAUDE.md              # Main entry (concise, ~500 words max)
+├── AGENTS.md              # Agent-specific rules (optional)
+└── .claude/
+    └── docs/
+        ├── vue-components.md    # Detailed: Vue patterns
+        ├── api-guidelines.md    # Detailed: API conventions
+        ├── testing-rules.md     # Detailed: Test requirements
+        └── ...
+```
+
+## Writing Main Entry Docs
+
+### Format for CLAUDE.md/AGENTS.md
+
+Use conditional loading statements instead of inline content:
+
+```markdown
+# Project Instructions
+
+## Code Style
+- Follow existing patterns in codebase
+- If writing Vue components, read `.claude/docs/vue-components.md`
+- If writing API endpoints, read `.claude/docs/api-guidelines.md`
+
+## Testing
+- All features require tests
+- For testing guidelines, read `.claude/docs/testing-rules.md`
+```
+
+### What Belongs in Main Doc
+
+- Project name and brief purpose
+- Critical constraints (security, performance)
+- Conditional pointers to detailed docs
+- High-level workflow triggers
+
+### What Goes in Reference Docs
+
+- Detailed examples and code patterns
+- Step-by-step procedures
+- Schema definitions and API specs
+- Domain-specific knowledge
+
+## Creating Reference Docs
+
+### Naming Convention
+
+Use descriptive kebab-case names:
+- `vue-components.md` - Component patterns
+- `api-v2-migration.md` - Migration guide
+- `auth-flow.md` - Authentication details
+
+### Reference Doc Template
+
+```markdown
+# [Topic Name]
+
+## When to Use
+[Brief description of when agent should load this doc]
+
+## Guidelines
+[Main content - patterns, rules, examples]
+
+## Examples
+[Concrete code examples if applicable]
+
+## Common Mistakes
+[What to avoid]
+```
+
+## Workflow
+
+### Adding New Instructions
+
+1. Determine if instruction is universal or conditional
+2. Universal → Add brief line to main doc
+3. Conditional → Create/update reference doc in `.claude/docs/`
+4. Add pointer in main doc: "If doing X, read `.claude/docs/x.md`"
+
+### Auditing Existing Docs
+
+1. Check main doc line count (target: <100 lines)
+2. Identify inline content that should be extracted
+3. Group related instructions into reference docs
+4. Replace inline content with conditional pointers
+
+### Reorganizing Docs
+
+To refactor bloated main docs:
+
+1. Read current CLAUDE.md/AGENTS.md
+2. Categorize content by topic/trigger condition
+3. Create reference docs for each category
+4. Rewrite main doc with pointers only
+5. Verify no duplicate content exists
+
+## Best Practices
+
+- **One topic per reference doc** - Easier to maintain and load
+- **Use clear trigger phrases** - "If writing...", "When debugging...", "For API..."
+- **Keep references self-contained** - Should make sense without main doc context
+- **Version reference docs** - Include date or version if content evolves
+- **Test the flow** - Simulate agent loading to verify pointers work
+
+## Resources
+
+### references/reference-doc-template.md
+
+Template for creating new agent reference docs. Copy this template when creating a new reference doc in `.claude/docs/`.
+
+### references/writing-best-practices.md
+
+Detailed guidance on writing effective agent documentation, including structure, language, sizing, and common mistakes. Read when improving doc quality or reorganizing existing docs.
diff --git a/conf/claude-local-marketplace/skills/agent-doc/references/reference-doc-template.md b/conf/claude-local-marketplace/skills/agent-doc/references/reference-doc-template.md
new file mode 100644
index 00000000..ff5ffb1f
--- /dev/null
+++ b/conf/claude-local-marketplace/skills/agent-doc/references/reference-doc-template.md
@@ -0,0 +1,40 @@
+# [Topic Name]
+
+## When to Use
+
+[Brief 1-2 sentence description of when an agent should load this reference doc. Example: "Load this when implementing Vue components or refactoring existing Vue code."]
+
+## Guidelines
+
+[Main instructional content - include specific rules, patterns, conventions, or requirements]
+
+### Sub-section 1
+
+[Organize content into logical sections]
+
+### Sub-section 2
+
+[More detailed guidance]
+
+## Examples
+
+### Example 1: [Description]
+
+```[language]
+// Code example showing the pattern in practice
+```
+
+### Example 2: [Description]
+
+```[language]
+// Another concrete example
+```
+
+## Common Mistakes
+
+- **Mistake 1** - [What to avoid and why]
+- **Mistake 2** - [What to avoid and why]
+
+## Related
+
+- See also: [Links to related reference docs if applicable]
diff --git a/conf/claude-local-marketplace/skills/agent-doc/references/writing-best-practices.md b/conf/claude-local-marketplace/skills/agent-doc/references/writing-best-practices.md
new file mode 100644
index 00000000..d227d054
--- /dev/null
+++ b/conf/claude-local-marketplace/skills/agent-doc/references/writing-best-practices.md
@@ -0,0 +1,85 @@
+# Writing Effective Agent Reference Docs
+
+## When to Use
+
+Load this when creating new reference documentation or improving existing agent docs.
+
+## Principles
+
+### Scannable Structure
+
+- Use clear headings (H2, H3) for navigation
+- Lead sections with the most important info
+- Keep paragraphs short (3-5 sentences max)
+- Use lists for multiple items
+
+### Action-Oriented Language
+
+Use imperative form:
+- ✅ "Use kebab-case for file names"
+- ❌ "You should use kebab-case"
+- ❌ "Files should be named using kebab-case"
+
+### Concrete Over Abstract
+
+- ✅ "Name files like `user-profile.vue`, not `UserProfile.vue`"
+- ❌ "Follow consistent naming conventions"
+
+### Self-Contained Sections
+
+Each section should make sense independently. Agents may jump directly to a section via search.
+
+## Optimal Doc Sizes
+
+| Doc Type | Target Size | Max Size |
+|----------|-------------|----------|
+| Main entry (CLAUDE.md) | 50-100 lines | 150 lines |
+| Reference doc | 100-300 lines | 500 lines |
+| Quick reference | 20-50 lines | 100 lines |
+
+If a reference doc exceeds 500 lines, split into multiple focused docs.
+
+## Trigger Phrase Patterns
+
+Use consistent patterns in main docs to point to references:
+
+```markdown
+# Clear triggers
+- If writing [X], read `.claude/docs/[x].md`
+- When debugging [Y], see `.claude/docs/[y]-debugging.md`
+- For [Z] conventions, follow `.claude/docs/[z]-conventions.md`
+
+# Bad triggers (too vague)
+- See docs for more info
+- Check related documentation
+- Refer to guidelines
+```
+
+## Content Categories
+
+### Critical (Main Doc)
+
+- Security constraints
+- Breaking rules (what NEVER to do)
+- Project-wide conventions
+
+### Conditional (Reference Docs)
+
+- Technology-specific patterns
+- Feature area guidelines
+- Domain knowledge
+- Workflow procedures
+
+### Ephemeral (Don't Document)
+
+- Temporary workarounds
+- One-time procedures
+- Obvious conventions
+
+## Common Mistakes
+
+- **Over-documenting** - Not every pattern needs a doc; trust agent reasoning
+- **Duplicating content** - Same info in main doc AND reference doc
+- **Vague triggers** - "See docs" doesn't tell agent when to load
+- **Stale content** - Docs that don't match actual codebase
+- **Buried critical info** - Important constraints hidden in long docs
diff --git a/conf/llm/docs/coding-rules.md b/conf/llm/docs/coding-rules.md
index 63b65ec8..e8b39db1 100644
--- a/conf/llm/docs/coding-rules.md
+++ b/conf/llm/docs/coding-rules.md
@@ -6,7 +6,6 @@
 
 ## Code of Conduct
 - Follow <context-aware-hierarchical-delegation> in the whole session.
-- **Clarity:** If intent is ambiguous, try 1: Use `recent-history` Skill to check last session context. 2. Ask clear clarifying questions (e.g., “Do you mean X or Y?”).
 - **Good Output Format:** Use enhanced Markdown formatting for clarity.
 - **Divergent thinking:** Extend your knowledge with web, kg tools, then use divergent thinking, use this for design, issue debugging.
 - **Facts check on Plan/Outdated context:** It is 2025 year now, facts check based on existing code patterns, official documentation(from exa/web), or trusted sources (e.g., Stack Overflow). Do not assume facts without verification, your knowledge is 1 year behind.
@@ -34,7 +33,7 @@
 - **Fail Fast:** Let bugs surface; do not mask errors with `try-catch` or optional chaining.
 - **Comment Intent:** Use `FIXME`, `TODO`, and `NOTE` to flag issues, explain logic, document changes, and note trade-offs.
 - **Comment as documentation:** Document any implement intent, decisions, critical findings in the code comment. Especially after a fix, document the reason behind the change.
-- **Design for Testability:** Apply DfT principles from the start—use dependency injection, prefer pure functions, avoid global state, and design for controllability and observability. Create seams for testing; isolate components to enable independent verification.
+- **Design for Testability:** Apply DFT principles from the start—use dependency injection, prefer pure functions, avoid global state, and design for controllability and observability. Create seams for testing; isolate components to enable independent verification.
 - **Avoid introduce implement complexity:** No backward compatibility layers, feature flags, or toggles unless explicitly requested.
 - **No external data based design:** Avoid designs relying on external data, for example, use external api data to determine program logic or control flow, it will broke when external data changes.
 - **Avoid outdated dependency:** Use the latest stable version of dependencies unless there is a specific reason to use an older version. This is important to avoid big refactor later.
diff --git a/nix/hm/ai/codex/default.nix b/nix/hm/ai/codex/default.nix
index 01deef17..cf64cae3 100644
--- a/nix/hm/ai/codex/default.nix
+++ b/nix/hm/ai/codex/default.nix
@@ -7,7 +7,7 @@
 let
   proxyConfig = import ../../../lib/proxy.nix { inherit lib pkgs; };
   mcp = import ../../../modules/ai/mcp.nix { inherit pkgs lib config; };
-  codex_home = "${config.xdg.configHome}/codex";
+  codex_home = "${config.home.homeDirectory}/.codex";
   codexMcpToml = builtins.readFile (
     (pkgs.formats.toml { }).generate "codex-mcp.toml" { mcp_servers = mcp.clients.codex; }
   );
@@ -29,17 +29,17 @@ in
     codex-with-proxy
   ];
 
-  xdg.configFile = {
-    "codex/instructions" = {
+  home.file = {
+    ".codex/instructions" = {
       source = ./instructions;
       recursive = true;
     };
-    "codex/skills" = {
+    ".codex/skills" = {
       source = ../../../../conf/claude-local-marketplace/skills;
       recursive = true;
     };
     # toml
-    "codex/config-generated.toml".text = ''
+    ".codex/config-generated.toml".text = ''
       model = "gpt-5.2-medium"
       model_provider = "packy"
       approval_policy = "untrusted"
@@ -61,7 +61,7 @@ in
       name = "packy"
       wire_api = "responses"
       base_url = "https://www.packyapi.com/v1"
-      env_key = "PACKYCODE_CODEX_API_KEY"
+      http_headers = { "Authorization" = "Bearer ${pkgs.nix-priv.keys.customProviders.packyOpenaiKey}" }
 
       [model_providers.litellm]
       name = "litellm"
@@ -141,13 +141,13 @@ in
       writable_roots = ["${config.home.homeDirectory}/workspace/work"]
 
       [shell_environment_policy]
-      inherit = "core"
+      inherit = "all"
       ignore_default_excludes = true
       # ["AWS_*"]
-      exclude = []
+      exclude = ["LITELLM_*", "OPENROUTER_*", "ZAI_*", "MOONSHOT_*"]
       # if provided, *only* vars matching these patterns are kept
-      include_only = []
-      set = { HTTP_PROXY = "${proxyConfig.proxies.http}", HTTPS_PROXY = "${proxyConfig.proxies.https}" }
+      # include_only = []
+      set = { COPILOT = 1, HTTP_PROXY = "${proxyConfig.proxies.http}", HTTPS_PROXY = "${proxyConfig.proxies.https}" }
 
       ## MCP
       ${codexMcpToml}
diff --git a/nix/hm/litellm.nix b/nix/hm/litellm.nix
index dd4fa88d..5fe07ea7 100644
--- a/nix/hm/litellm.nix
+++ b/nix/hm/litellm.nix
@@ -113,7 +113,7 @@ in
 
     # Point Claude Code to LiteLLM proxy
     # ANTHROPIC_BASE_URL = "http://0.0.0.0:4000";
-    ANTHROPIC_AUTH_TOKEN = pkgs.nix-priv.keys.litellm.apiKey;
+    # ANTHROPIC_AUTH_TOKEN = pkgs.nix-priv.keys.litellm.apiKey;
 
     # Claude Code model selection - configure which models to use for different tiers
     # These map to the model names defined in the LiteLLM config above
diff --git a/nix/hm/litellm/bender-muffin.nix b/nix/hm/litellm/bender-muffin.nix
index cfb37a7b..e8951e75 100644
--- a/nix/hm/litellm/bender-muffin.nix
+++ b/nix/hm/litellm/bender-muffin.nix
@@ -9,10 +9,10 @@ let
 in
 [
   (providers.packyGemini.model {
-    model_name = "packy/gemini-3-flash";
+    model_name = modelName;
     litellm_params = {
       model = "anthropic/gemini-3-flash-preview";
-      rpm = 1;
+      rpm = 3;
     };
   })
   (providers.packyCc.model {
@@ -27,7 +27,7 @@ in
     litellm_params = {
       model = "anthropic/MiniMax-M2.1";
       max_tokens = 64000;
-      rpm = 5;
+      rpm = 2;
     };
   })
   (providers.zenmuxAnthropic.model {
diff --git a/nix/hm/litellm/config-generator.nix b/nix/hm/litellm/config-generator.nix
index be480f52..a70a1cd0 100644
--- a/nix/hm/litellm/config-generator.nix
+++ b/nix/hm/litellm/config-generator.nix
@@ -42,7 +42,6 @@ in
     litellm_settings = {
       REPEATED_STREAMING_CHUNK_LIMIT = 100;
       image_generation_model = "openrouter/x-ai/grok-4-fast";
-      master_key = "os.environ/LITELLM_MASTER_KEY";
       request_timeout = 600;
       num_retries = 2;
       allowed_fails = 3;
@@ -54,6 +53,8 @@ in
         { "copilot/claude-haiku-4.5" = [ "opencodeai/claude-haiku-4-5" ]; }
         { "copilot/claude-sonnet-4.5" = [ "opencodeai/claude-sonnet-4.5" ]; }
         { "copilot/gpt-5-mini" = [ "openrouter/minimax/minimax-m2" ]; }
+        { "frontier-muffin" = [ "packy/claude-sonnet-4-5" ]; }
+        { "bender-muffin" = [ "packy/claude-haiku-4-5" ]; }
       ];
       cache = false;
       cache_params = {
@@ -70,7 +71,9 @@ in
       enable_json_schema_validation = true;
     };
     general_settings = {
-      health_check_interval = 300;
+      master_key = pkgs.nix-priv.keys.litellm.apiKey;
+      background_health_checks = false;
+      health_check_interval = 300000;
     };
     router_settings = {
       num_retries = 2;
diff --git a/nix/hm/litellm/deploy/README.md b/nix/hm/litellm/deploy/README.md
index 7364d006..6392e427 100644
--- a/nix/hm/litellm/deploy/README.md
+++ b/nix/hm/litellm/deploy/README.md
@@ -115,8 +115,14 @@ sudo journalctl -u litellm -n 100
 ## Testing
 
 ```bash
-# Health check
-curl http://localhost:4000/health
+# Health check (readiness - no auth required, FREE)
+curl http://localhost:4000/health/readiness
+
+# Health check (liveliness - no auth required, FREE)
+curl http://localhost:4000/health/liveliness
+
+# WARNING: DO NOT use /health endpoint - it costs tokens!
+# curl http://localhost:4000/health
 
 # List models (requires master key from config)
 curl http://localhost:4000/v1/models \
@@ -150,3 +156,4 @@ curl http://localhost:4000/v1/chat/completions \
 2. **Secrets in Config**: The `config.yaml` built via nix contains embedded secrets from `nix-priv`
 3. **Port**: LiteLLM listens on port 4000 by default
 4. **User Isolation**: Runs as dedicated `litellm` system user for security
+5. **SECURITY**: See `SECURITY.md` for critical information about protecting the `/health` endpoint from token drain attacks
diff --git a/nix/hm/litellm/deploy/deploy.sh b/nix/hm/litellm/deploy/deploy.sh
index 480915e2..efb3b0df 100644
--- a/nix/hm/litellm/deploy/deploy.sh
+++ b/nix/hm/litellm/deploy/deploy.sh
@@ -133,7 +133,10 @@ if systemctl is-active --quiet litellm; then
     echo "Service status: active"
     echo ""
     echo "View logs: sudo journalctl -u litellm -f"
-    echo "Check health: curl http://localhost:4000/health"
+    echo ""
+    echo "Health check endpoints (DO NOT use /health - it costs tokens!):"
+    echo "  - Readiness: curl http://localhost:4000/health/readiness"
+    echo "  - Liveliness: curl http://localhost:4000/health/liveliness"
 else
     echo ""
     echo "=== Deployment WARNING ==="
diff --git a/nix/hm/litellm/frontier-muffin.nix b/nix/hm/litellm/frontier-muffin.nix
index 6b1b8c04..36b81531 100644
--- a/nix/hm/litellm/frontier-muffin.nix
+++ b/nix/hm/litellm/frontier-muffin.nix
@@ -8,19 +8,13 @@ let
   modelName = "frontier-muffin";
 in
 [
-  (providers.packyGemini.model {
-    model_name = "packy/gemini-3-pro";
-    litellm_params = {
-      model = "anthropic/gemini-3-pro-preview";
-      rpm = 5;
-    };
-  })
-  (providers.packyOpenai.model {
-    model_name = modelName;
-    litellm_params = {
-      model = "anthropic/gpt-5.2-high";
-    };
-  })
+  # (providers.packyGemini.model {
+  #   model_name = modelName;
+  #   litellm_params = {
+  #     model = "anthropic/gemini-3-pro-preview";
+  #     rpm = 4;
+  #   };
+  # })
   (providers.packyCc.model {
     model_name = modelName;
     litellm_params = {
diff --git a/nix/hm/litellm/general-models.nix b/nix/hm/litellm/general-models.nix
index 2d1d5267..9dfd4584 100644
--- a/nix/hm/litellm/general-models.nix
+++ b/nix/hm/litellm/general-models.nix
@@ -75,6 +75,9 @@ let
       litellm_params = {
         model = "openrouter/*";
       };
+      model_info = {
+        disable_background_health_check = true;
+      };
     })
   ];
 
@@ -323,13 +326,13 @@ let
       };
     })
     (providers.packyCc.model {
-      model_name = "packy/claude-opus-4-5-20251101";
+      model_name = "packy/claude-opus-4-5";
       litellm_params = {
         model = "anthropic/claude-opus-4-5-20251101";
       };
     })
     (providers.packyCc.model {
-      model_name = "packy/claude-haiku-4-5-20251001";
+      model_name = "packy/claude-haiku-4-5";
       litellm_params = {
         model = "anthropic/claude-haiku-4-5-20251001";
       };