fix(api/chat): wire chat endpoint through LLMService.chat() — closes silent canned-response regression (#7047) (#7101)

mrveiss · t · claude · web-flow · commit ff22ca612080 · 2026-05-06T22:22:22.000+03:00
Two #3185 LLMInterface-retirement misses shipped silently in the same helper, causing chat() to silently return a canned "I'm currently unable to generate a response" string for every user request: Bug 1 — broken module path (api/chat.py:106): from llm_service import LLMService # module never existed Canonical path is services.llm_service. The import was inside ``get_llm_service()`` so it deferred to first call rather than boot — silent until a chat request arrives, then ModuleNotFoundError. Bug 2 — feature-degradation guard around a stale method (api/chat.py:543): if hasattr(llm_service, "generate_response"): return await llm_service.generate_response(...) else: return {"content": "I'm currently unable to generate a response..."} LLMService doesn't expose generate_response (that was an LLMInterface method). The hasattr guard always took the else branch, so every request received the canned fallback regardless of whether LLMService was healthy. Major user-facing functional regression. Fix --- 1. Update lazy import to ``services.llm_service.LLMService``. 2. Replace the hasattr/else dance with a real ``llm_service.chat(...)`` call. Map LLMService's contract: - messages ← llm_context (already OpenAI-format) - conversation_id ← session_id (per-conversation overrides) - request_id ← request_id (passes through **kwargs for tracing) Read response.error / response.content per LLMResponse's Pydantic model (verified shapes against services/llm_service.py:99-112). 3. User-facing fallback strings are unchanged on error — internal error reasons are logged via ``logger.warning`` but never leaked to the response. Verification ------------ Added ``api/chat_generate_ai_response_test.py`` — 5 tests pinning the contract: - happy path: response.content surfaces as result["content"] - chat() args: messages/conversation_id/request_id passed through - LLM returns error: fallback string, error reason NOT leaked to user - chat() raises: fallback string, exception detail NOT leaked - **regression pin** for the original bug: hasattr(llm_service, "generate_response") path can never silently swallow output again — test fails if a future change reintroduces it - **regression pin** for the import path: assert source contains ``from services.llm_service import LLMService`` $ python3 -m pytest autobot-backend/api/chat_generate_ai_response_test.py -xvs ============================== 5 passed in 2.80s =============================== Per the feedback memory I just saved (verify_return_shape_in_method_migration): - LLMResponse.content: str ✓ verified - LLMResponse.error: Optional[str] ✓ verified - chat() return type: LLMResponse (inspected via inspect.signature) Closes #7047 (items 1 + 2). The remaining sites flagged in the issue body (async_chat_workflow.py, modern_ai_integration.py, nl_database_service.py) are tracked for separate per-site verification — their contexts differ enough that batching them risks the same return- shape miss this PR caught with tests. Co-authored-by: t <t@t> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/autobot-backend/api/chat.py b/autobot-backend/api/chat.py
@@ -103,7 +103,7 @@ def get_memory_interface(request: Request) -> Optional[Any]:
 
 def get_llm_service(request: Request) -> Any:
     """Get LLM service from app state, with lazy initialization"""
-    from llm_service import LLMService
+    from services.llm_service import LLMService
 
     from utils.lazy_singleton import lazy_init_singleton
 
@@ -540,15 +540,21 @@ async def _generate_ai_response(llm_service, llm_context: List[Dict], session_id
         AI response dict with content and role
     """
     try:
-        if hasattr(llm_service, "generate_response"):
-            return await llm_service.generate_response(
-                messages=llm_context, session_id=session_id, request_id=request_id
-            )
-        else:
+        # LLMService.chat() accepts OpenAI-format messages and uses
+        # conversation_id for per-conversation provider/model overrides.
+        # request_id flows through via **kwargs for tracing.
+        response = await llm_service.chat(
+            messages=llm_context,
+            conversation_id=session_id,
+            request_id=request_id,
+        )
+        if response.error:
+            logger.warning("LLM returned error for request %s: %s", request_id, response.error)
             return {
-                "content": "I'm currently unable to generate a response. Please try again.",
+                "content": "I encountered an error processing your message. Please try again.",
                 "role": "assistant",
             }
+        return {"content": response.content, "role": "assistant"}
     except Exception as e:
         logger.error("LLM generation failed: %s", e)
         return {
diff --git a/autobot-backend/api/chat_generate_ai_response_test.py b/autobot-backend/api/chat_generate_ai_response_test.py
@@ -0,0 +1,147 @@
+# AutoBot - AI-Powered Automation Platform
+# Copyright (c) 2025 mrveiss
+# Author: mrveiss
+"""Contract tests for ``api.chat._generate_ai_response`` (#7047).
+
+Two regressions shipped silently in the same helper after the #3185
+LLMInterface retirement:
+
+  1. ``api/chat.py:106`` imported from a non-existent ``llm_service`` module
+     (canonical path is ``services.llm_service``); function-scoped, so it
+     fired only on first call.
+
+  2. ``api/chat.py:543`` had ``hasattr(llm_service, "generate_response")``
+     guarding a method that LLMService never exposed. The else-branch ran
+     for every chat request — users got a canned "I'm currently unable
+     to generate a response" string instead of the model's reply.
+
+These tests pin the migrated shape so the same class of drift can't
+recur silently.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import AsyncMock
+
+import pytest
+
+
+@pytest.fixture
+def make_llm_response():
+    """Return a factory that builds a stub ``LLMResponse``-shaped object."""
+
+    class _StubResponse:
+        def __init__(self, *, content: str = "", error: str | None = None):
+            self.content = content
+            self.error = error
+
+    return _StubResponse
+
+
+@pytest.mark.asyncio
+async def test_generate_ai_response_returns_model_content_on_success(make_llm_response: Any) -> None:
+    """Happy path: LLMResponse.content surfaces in the result dict."""
+    from api.chat import _generate_ai_response
+
+    llm_service = AsyncMock()
+    llm_service.chat = AsyncMock(return_value=make_llm_response(content="Hello, world!", error=None))
+
+    result = await _generate_ai_response(
+        llm_service=llm_service,
+        llm_context=[{"role": "user", "content": "hi"}],
+        session_id="s-123",
+        request_id="r-abc",
+    )
+
+    assert result == {"content": "Hello, world!", "role": "assistant"}
+    # Pin the call shape — confirms migrated args reach LLMService.chat correctly.
+    llm_service.chat.assert_awaited_once_with(
+        messages=[{"role": "user", "content": "hi"}],
+        conversation_id="s-123",
+        request_id="r-abc",
+    )
+
+
+@pytest.mark.asyncio
+async def test_generate_ai_response_falls_back_when_llm_returns_error(make_llm_response: Any) -> None:
+    """LLMResponse.error truthy → user-friendly fallback message."""
+    from api.chat import _generate_ai_response
+
+    llm_service = AsyncMock()
+    llm_service.chat = AsyncMock(return_value=make_llm_response(content="", error="rate limit exceeded"))
+
+    result = await _generate_ai_response(
+        llm_service=llm_service,
+        llm_context=[{"role": "user", "content": "hi"}],
+        session_id="s-1",
+        request_id="r-1",
+    )
+
+    assert result["role"] == "assistant"
+    # Must NOT leak the underlying error message to the user — fallback string only.
+    assert "I encountered an error" in result["content"]
+    assert "rate limit" not in result["content"]
+
+
+@pytest.mark.asyncio
+async def test_generate_ai_response_falls_back_when_chat_raises(make_llm_response: Any) -> None:
+    """Network/runtime exception in chat() → user-friendly fallback."""
+    from api.chat import _generate_ai_response
+
+    llm_service = AsyncMock()
+    llm_service.chat = AsyncMock(side_effect=RuntimeError("boom"))
+
+    result = await _generate_ai_response(
+        llm_service=llm_service,
+        llm_context=[{"role": "user", "content": "hi"}],
+        session_id="s-1",
+        request_id="r-1",
+    )
+
+    assert result["role"] == "assistant"
+    assert "I encountered an error" in result["content"]
+    # Underlying exception detail must not surface in user-facing string.
+    assert "boom" not in result["content"]
+
+
+@pytest.mark.asyncio
+async def test_generate_ai_response_does_not_call_legacy_generate_response(make_llm_response: Any) -> None:
+    """Regression pin: post-#3185 the helper must call .chat(), never
+    .generate_response() (which was the deleted LLMInterface method).
+    Catches the original #7047 silent-fallback bug if reintroduced.
+    """
+    from api.chat import _generate_ai_response
+
+    llm_service = AsyncMock()
+    llm_service.chat = AsyncMock(return_value=make_llm_response(content="ok", error=None))
+    # If a future caller hits this attribute, the test fails — locks the migration.
+    llm_service.generate_response = AsyncMock(side_effect=AssertionError("legacy method"))
+
+    result = await _generate_ai_response(
+        llm_service=llm_service,
+        llm_context=[{"role": "user", "content": "hi"}],
+        session_id="s-1",
+        request_id="r-1",
+    )
+
+    assert result["content"] == "ok"
+    llm_service.generate_response.assert_not_awaited()
+    llm_service.chat.assert_awaited_once()
+
+
+def test_get_llm_service_imports_canonical_module_path() -> None:
+    """Regression pin: the lazy import inside ``get_llm_service`` must
+    resolve to ``services.llm_service.LLMService`` (the canonical post-#3185
+    location), not a non-existent top-level ``llm_service`` module.
+    """
+    import inspect
+
+    from api import chat
+
+    src = inspect.getsource(chat.get_llm_service)
+    assert "from services.llm_service import LLMService" in src, (
+        "get_llm_service must import from services.llm_service "
+        "(canonical post-#3185 path); the older 'from llm_service import' "
+        "raises ModuleNotFoundError at first call."
+    )