From b3d1de6bf447641a09fe9c3c3c9579dabfd4945b Mon Sep 17 00:00:00 2001 From: seanGSISG Date: Thu, 1 Jan 2026 19:47:15 -0700 Subject: [PATCH] fix: implement two-stage readiness to prevent fresh install timeout ## Problem Fresh plugin installations fail with "operation was aborted" timeout errors because hooks wait for full worker initialization including MCP connection, which can take 10-30+ seconds on first run (downloading dependencies, etc.). The 15-second hook timeout is insufficient for this initialization time. ## Root Cause The Windows stability fix (PR #378) changed hooks to use `/api/readiness` which waits for full initialization including MCP. However, hooks only need database and SearchManager to function - they don't use MCP at all. This created a coupling where MCP initialization (the slow part) blocks hooks that don't need it. ## Solution: Two-Stage Readiness Introduce staged initialization to separate what hooks need from full readiness: 1. **`coreReady` flag** - Set after database and SearchManager are initialized 2. **`/api/core-ready` endpoint** - Returns 200 when core services are ready 3. **`/api/health`** - Now includes `coreReady` field for visibility 4. **`/api/readiness`** - Unchanged, still waits for full init (backward compat) ### Changes **worker-service.ts:** - Add `coreReady` flag alongside existing `mcpReady` and `initializationCompleteFlag` - Add `/api/core-ready` endpoint that returns 200 when database+SearchManager ready - Update `waitForHealth()` to use `/api/health` (server listening check only) - Set `coreReady=true` after SearchManager initialization, before MCP connection **worker-utils.ts:** - Update `isWorkerHealthy()` to use `/api/core-ready` instead of `/api/readiness` - Hooks now proceed as soon as core services are ready **hooks.json:** - Increase worker-service timeout from 15s to 45s as safety margin ## Benefits - Fresh installs work without timeout errors - Hooks proceed as soon as database+SearchManager are ready (~3s) - MCP connection continues in background without blocking hooks - MCP failures don't break hook functionality - Backward compatible - `/api/readiness` unchanged for diagnostics/tooling ## Testing 1. Simulate fresh install: `rm -rf ~/.claude-mem` 2. Start new Claude session 3. Verify no "operation was aborted" errors 4. Verify context injection works on first prompt --- plugin/hooks/hooks.json | 8 ++++---- src/services/worker-service.ts | 37 +++++++++++++++++++++++++++++----- src/shared/worker-utils.ts | 7 ++++--- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/plugin/hooks/hooks.json b/plugin/hooks/hooks.json index aba9df818..bfc5446b2 100644 --- a/plugin/hooks/hooks.json +++ b/plugin/hooks/hooks.json @@ -13,7 +13,7 @@ { "type": "command", "command": "bun \"${CLAUDE_PLUGIN_ROOT}/scripts/worker-service.cjs\" start", - "timeout": 15 + "timeout": 45 }, { "type": "command", @@ -34,7 +34,7 @@ { "type": "command", "command": "bun \"${CLAUDE_PLUGIN_ROOT}/scripts/worker-service.cjs\" start", - "timeout": 15 + "timeout": 45 }, { "type": "command", @@ -51,7 +51,7 @@ { "type": "command", "command": "bun \"${CLAUDE_PLUGIN_ROOT}/scripts/worker-service.cjs\" start", - "timeout": 15 + "timeout": 45 }, { "type": "command", @@ -67,7 +67,7 @@ { "type": "command", "command": "bun \"${CLAUDE_PLUGIN_ROOT}/scripts/worker-service.cjs\" start", - "timeout": 15 + "timeout": 45 }, { "type": "command", diff --git a/src/services/worker-service.ts b/src/services/worker-service.ts index 342d088d9..7acfe3bd3 100644 --- a/src/services/worker-service.ts +++ b/src/services/worker-service.ts @@ -155,7 +155,8 @@ async function waitForHealth(port: number, timeoutMs: number = 30000): Promise { + if (this.coreReady) { + res.status(200).json({ + status: 'ready', + mcpReady: this.mcpReady, + }); + } else { + res.status(503).json({ + status: 'initializing', + message: 'Core services still initializing, please retry', + }); + } + }); + + // Full readiness check endpoint - returns 503 until full initialization completes (including MCP) + // Used for diagnostics and anything that requires MCP to be connected this.app.get('/api/readiness', (_req, res) => { if (this.initializationCompleteFlag) { res.status(200).json({ @@ -687,6 +709,11 @@ export class WorkerService { this.searchRoutes.setupRoutes(this.app); // Setup search routes now that SearchManager is ready logger.info('WORKER', 'SearchManager initialized and search routes registered'); + // Core services are ready - hooks can now work (database + SearchManager) + // MCP connection happens next but hooks don't need it + this.coreReady = true; + logger.info('SYSTEM', 'Core services ready (hooks can now proceed)'); + // Connect to MCP server with timeout guard const mcpServerPath = path.join(__dirname, 'mcp-server.cjs'); const transport = new StdioClientTransport({ diff --git a/src/shared/worker-utils.ts b/src/shared/worker-utils.ts index c09b34468..7075ae728 100644 --- a/src/shared/worker-utils.ts +++ b/src/shared/worker-utils.ts @@ -57,13 +57,14 @@ export function clearPortCache(): void { } /** - * Check if worker is responsive and fully initialized by trying the readiness endpoint - * Changed from /health to /api/readiness to ensure MCP initialization is complete + * Check if worker core services are ready (database + SearchManager) + * Uses /api/core-ready - hooks don't need MCP, only core services + * Full readiness (including MCP) is checked via /api/readiness for diagnostics */ async function isWorkerHealthy(): Promise { const port = getWorkerPort(); // Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion) - const response = await fetch(`http://127.0.0.1:${port}/api/readiness`); + const response = await fetch(`http://127.0.0.1:${port}/api/core-ready`); return response.ok; }