aniongithub
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 0 additions & 1 deletion b/‎.devcontainer/devcontainer.json‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.env‎
Lines changed: 2 additions & 4 deletions b/‎.env‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎.github/workflows/ci-build.yml‎
Lines changed: 6 additions & 5 deletions b/‎.github/workflows/ci-build.yml‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎.github/workflows/pr-check.yml‎
Lines changed: 6 additions & 5 deletions b/‎.github/workflows/pr-check.yml‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎api/main.py‎
Lines changed: 1 addition & 3 deletions b/‎api/main.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎api/memoryalpha/ask.py‎
Lines changed: 31 additions & 7 deletions b/‎api/memoryalpha/ask.py‎
Lines changed: 31 additions & 7 deletions
diff --git a/‎api/memoryalpha/identify.py‎
Lines changed: 0 additions & 31 deletions b/‎api/memoryalpha/identify.py‎
Lines changed: 0 additions & 31 deletions
@@ -22,7 +22,6 @@
 			"extensions": [
 				"ms-python.python",
 				"zaaack.markdown-editor",
-				"bierner.emojisense",
 				"ms-python.debugpy"
 			]
         }
 
@@ -1,7 +1,5 @@
-DEFAULT_MODEL="qwen3:0.6b"
-DEFAULT_IMAGE_MODEL="qwen2.5vl:3b"
+DEFAULT_MODEL="qwen3:0.6b-q4_K_M"
 
 OLLAMA_URL="http://ollama:11434"
 DB_PATH="/data/enmemoryalpha_db"
-TEXT_COLLECTION_NAME="memoryalpha_text"
-IMAGE_COLLECTION_NAME="memoryalpha_images"
+TEXT_COLLECTION_NAME="memoryalpha_text"
@@ -40,14 +40,15 @@ jobs:
         
     - name: Test ask endpoint
       run: |
-        # Test the synchronous ask endpoint with a simple query
-        response=$(curl -s -f "http://localhost:8000/memoryalpha/rag/ask?question=What%20is%20the%20Enterprise?&thinkingmode=DISABLED&max_tokens=100&top_k=3")
-        
+        # Test the ask endpoint with a simple query
+        response=$(curl -X POST "http://localhost:8000/memoryalpha/rag/ask"   -H "Content-Type: application/json"   -d '{
+            "question": "What is the color of Vulcan blood?"
+          }')
         # Check if response contains expected content
-        if echo "$response" | grep -q "Enterprise"; then
+        if echo "$response" | grep -q "green"; then
           echo "✅ Ask endpoint test passed"
         else
-          echo "❌ Ask endpoint test failed - no relevant content found"
+          echo "❌ Ask endpoint test failed, answer did not contain expected content"
           echo "Response: $response"
           exit 1
         fi
 
@@ -39,14 +39,15 @@ jobs:
         
     - name: Test ask endpoint
       run: |
-        # Test the synchronous ask endpoint with a simple query
-        response=$(curl -s -f "http://localhost:8000/memoryalpha/rag/ask?question=What%20is%20the%20Enterprise?&thinkingmode=DISABLED&max_tokens=100&top_k=3")
-        
+        # Test the ask endpoint with a simple query
+        response=$(curl -X POST "http://localhost:8000/memoryalpha/rag/ask"   -H "Content-Type: application/json"   -d '{
+            "question": "What was the name of human who discovered warp drive?"
+          }')
         # Check if response contains expected content
-        if echo "$response" | grep -q "Enterprise"; then
+        if echo "$response" | grep -q "Zefram Cochrane"; then
           echo "✅ Ask endpoint test passed"
         else
-          echo "❌ Ask endpoint test failed - no relevant content found"
+          echo "❌ Ask endpoint test failed, answer did not contain expected content"
           echo "Response: $response"
           exit 1
         fi
 
@@ -3,7 +3,6 @@
 from fastapi import FastAPI
 from .memoryalpha.health import router as health_router
 from .memoryalpha.ask import router as ask_router
-from .memoryalpha.identify import router as identify_router
 
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -21,5 +20,4 @@ async def lifespan(app: FastAPI):
 app = FastAPI(lifespan=lifespan)
 
 app.include_router(health_router)
-app.include_router(ask_router)
-app.include_router(identify_router)
+app.include_router(ask_router)
@@ -1,29 +1,53 @@
-from fastapi import APIRouter, Query
+from fastapi import APIRouter, Query, Body
 from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from typing import Optional
 
-from .rag import MemoryAlphaRAG, ThinkingMode
+from .rag import MemoryAlphaRAG
 
 router = APIRouter()
 
 # Singleton or global instance for demo; in production, manage lifecycle properly
 rag_instance = MemoryAlphaRAG()
-ThinkingMode = ThinkingMode
+
+class AskRequest(BaseModel):
+    question: str
+    max_tokens: Optional[int] = 2048
+    top_k: Optional[int] = 10
+    top_p: Optional[float] = 0.8
+    temperature: Optional[float] = 0.3
+
+@router.post("/memoryalpha/rag/ask")
+def ask_endpoint_post(request: AskRequest):
+    """
+    Query the RAG pipeline and return the full response.
+    Accepts POST requests with JSON payload for cleaner API usage.
+    """
+    try:
+        answer = rag_instance.ask(
+            request.question, 
+            max_tokens=request.max_tokens,
+            top_k=request.top_k,
+            top_p=request.top_p,
+            temperature=request.temperature
+        )
+        return JSONResponse(content={"response": answer})
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
 
 @router.get("/memoryalpha/rag/ask")
 def ask_endpoint(
     question: str = Query(..., description="The user question"),
-    thinkingmode: str = Query("DISABLED", description="Thinking mode: DISABLED, QUIET, or VERBOSE"),
     max_tokens: int = Query(2048, description="Maximum tokens to generate"),
     top_k: int = Query(10, description="Number of documents to retrieve"),
     top_p: float = Query(0.8, description="Sampling parameter"),
     temperature: float = Query(0.3, description="Randomness/creativity of output")
 ):
     """
-    Query the RAG pipeline and return the full response (including thinking if enabled).
+    Query the RAG pipeline and return the full response.
+    Now uses advanced tool-enabled RAG by default for better results.
     """
     try:
-        # Set the thinking mode for this request
-        rag_instance.thinking_mode = ThinkingMode[thinkingmode.upper()]
         answer = rag_instance.ask(
             question, 
             max_tokens=max_tokens,
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,6 @@`
`22`	`22`	`"extensions": [`
`23`	`23`	`"ms-python.python",`
`24`	`24`	`"zaaack.markdown-editor",`
`25`		`- "bierner.emojisense",`
`26`	`25`	`"ms-python.debugpy"`
`27`	`26`	`]`
`28`	`27`	`}`