Skip to content

Commit d5dfdca

Browse files
authored
Remove identify api (#8)
* Refactor RAG API: Update default model, remove image identification, and enhance ask endpoint with tool-enabled search instead of naive document lookup * Prompt changes to provide better search results, increase max tokens to allow the model to provide longer responses. * Refactor ask endpoint: Update to use POST method with JSON payload for improved API usage; modify test cases for new question formats. Clean up unnecessary code and enhance logging in RAG implementation.
1 parent 6f51e7b commit d5dfdca

File tree

10 files changed

+274
-355
lines changed

10 files changed

+274
-355
lines changed

.devcontainer/devcontainer.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
"extensions": [
2323
"ms-python.python",
2424
"zaaack.markdown-editor",
25-
"bierner.emojisense",
2625
"ms-python.debugpy"
2726
]
2827
}

.env

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
DEFAULT_MODEL="qwen3:0.6b"
2-
DEFAULT_IMAGE_MODEL="qwen2.5vl:3b"
1+
DEFAULT_MODEL="qwen3:0.6b-q4_K_M"
32

43
OLLAMA_URL="http://ollama:11434"
54
DB_PATH="/data/enmemoryalpha_db"
6-
TEXT_COLLECTION_NAME="memoryalpha_text"
7-
IMAGE_COLLECTION_NAME="memoryalpha_images"
5+
TEXT_COLLECTION_NAME="memoryalpha_text"

.github/workflows/ci-build.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,15 @@ jobs:
4040
4141
- name: Test ask endpoint
4242
run: |
43-
# Test the synchronous ask endpoint with a simple query
44-
response=$(curl -s -f "http://localhost:8000/memoryalpha/rag/ask?question=What%20is%20the%20Enterprise?&thinkingmode=DISABLED&max_tokens=100&top_k=3")
45-
43+
# Test the ask endpoint with a simple query
44+
response=$(curl -X POST "http://localhost:8000/memoryalpha/rag/ask" -H "Content-Type: application/json" -d '{
45+
"question": "What is the color of Vulcan blood?"
46+
}')
4647
# Check if response contains expected content
47-
if echo "$response" | grep -q "Enterprise"; then
48+
if echo "$response" | grep -q "green"; then
4849
echo "✅ Ask endpoint test passed"
4950
else
50-
echo "❌ Ask endpoint test failed - no relevant content found"
51+
echo "❌ Ask endpoint test failed, answer did not contain expected content"
5152
echo "Response: $response"
5253
exit 1
5354
fi

.github/workflows/pr-check.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ jobs:
3939
4040
- name: Test ask endpoint
4141
run: |
42-
# Test the synchronous ask endpoint with a simple query
43-
response=$(curl -s -f "http://localhost:8000/memoryalpha/rag/ask?question=What%20is%20the%20Enterprise?&thinkingmode=DISABLED&max_tokens=100&top_k=3")
44-
42+
# Test the ask endpoint with a simple query
43+
response=$(curl -X POST "http://localhost:8000/memoryalpha/rag/ask" -H "Content-Type: application/json" -d '{
44+
"question": "What was the name of human who discovered warp drive?"
45+
}')
4546
# Check if response contains expected content
46-
if echo "$response" | grep -q "Enterprise"; then
47+
if echo "$response" | grep -q "Zefram Cochrane"; then
4748
echo "✅ Ask endpoint test passed"
4849
else
49-
echo "❌ Ask endpoint test failed - no relevant content found"
50+
echo "❌ Ask endpoint test failed, answer did not contain expected content"
5051
echo "Response: $response"
5152
exit 1
5253
fi

api/main.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from fastapi import FastAPI
44
from .memoryalpha.health import router as health_router
55
from .memoryalpha.ask import router as ask_router
6-
from .memoryalpha.identify import router as identify_router
76

87
# Configure logging
98
logging.basicConfig(level=logging.INFO)
@@ -21,5 +20,4 @@ async def lifespan(app: FastAPI):
2120
app = FastAPI(lifespan=lifespan)
2221

2322
app.include_router(health_router)
24-
app.include_router(ask_router)
25-
app.include_router(identify_router)
23+
app.include_router(ask_router)

api/memoryalpha/ask.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,53 @@
1-
from fastapi import APIRouter, Query
1+
from fastapi import APIRouter, Query, Body
22
from fastapi.responses import JSONResponse
3+
from pydantic import BaseModel
4+
from typing import Optional
35

4-
from .rag import MemoryAlphaRAG, ThinkingMode
6+
from .rag import MemoryAlphaRAG
57

68
router = APIRouter()
79

810
# Singleton or global instance for demo; in production, manage lifecycle properly
911
rag_instance = MemoryAlphaRAG()
10-
ThinkingMode = ThinkingMode
12+
13+
class AskRequest(BaseModel):
14+
question: str
15+
max_tokens: Optional[int] = 2048
16+
top_k: Optional[int] = 10
17+
top_p: Optional[float] = 0.8
18+
temperature: Optional[float] = 0.3
19+
20+
@router.post("/memoryalpha/rag/ask")
21+
def ask_endpoint_post(request: AskRequest):
22+
"""
23+
Query the RAG pipeline and return the full response.
24+
Accepts POST requests with JSON payload for cleaner API usage.
25+
"""
26+
try:
27+
answer = rag_instance.ask(
28+
request.question,
29+
max_tokens=request.max_tokens,
30+
top_k=request.top_k,
31+
top_p=request.top_p,
32+
temperature=request.temperature
33+
)
34+
return JSONResponse(content={"response": answer})
35+
except Exception as e:
36+
return JSONResponse(status_code=500, content={"error": str(e)})
1137

1238
@router.get("/memoryalpha/rag/ask")
1339
def ask_endpoint(
1440
question: str = Query(..., description="The user question"),
15-
thinkingmode: str = Query("DISABLED", description="Thinking mode: DISABLED, QUIET, or VERBOSE"),
1641
max_tokens: int = Query(2048, description="Maximum tokens to generate"),
1742
top_k: int = Query(10, description="Number of documents to retrieve"),
1843
top_p: float = Query(0.8, description="Sampling parameter"),
1944
temperature: float = Query(0.3, description="Randomness/creativity of output")
2045
):
2146
"""
22-
Query the RAG pipeline and return the full response (including thinking if enabled).
47+
Query the RAG pipeline and return the full response.
48+
Now uses advanced tool-enabled RAG by default for better results.
2349
"""
2450
try:
25-
# Set the thinking mode for this request
26-
rag_instance.thinking_mode = ThinkingMode[thinkingmode.upper()]
2751
answer = rag_instance.ask(
2852
question,
2953
max_tokens=max_tokens,

api/memoryalpha/identify.py

Lines changed: 0 additions & 31 deletions
This file was deleted.

0 commit comments

Comments
 (0)