Skip to content

Commit 6e68b10

Browse files
committed
Make the service more robust
Signed-off-by:Shreyanand <[email protected]>
1 parent 1125def commit 6e68b10

6 files changed

+276
-99
lines changed

config.yaml

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
tools:
22
- name: "product_assistant"
33
description: "Answers questions related to CloudForge products: CloudForge Migrate, Secure, AI Optimizer, DevOps Accelerator, Kubernetes Orchestrator Pro"
4-
url: "<HOST>"
4+
url: "host/query/product"
55
config:
66
method: 'POST'
77
headers:
@@ -15,10 +15,10 @@ tools:
1515
json:
1616
- "response"
1717
examples:
18-
- "What is the CloudForge Migrate product?"
18+
- "What does the CloudForge Migrate product do?"
1919
- name: "HR_assistant"
2020
description: "Answers questions related to HR, employment, onboarding, culture, policies, workplace, IT, and security"
21-
url: "<HOST>"
21+
url: "http://host/query/hr"
2222
config:
2323
method: 'POST'
2424
headers:
@@ -32,10 +32,10 @@ tools:
3232
json:
3333
- "response"
3434
examples:
35-
- "What should I do before the start date?"
35+
- "What HR things should I do before the start date?"
3636
- name: "accounts_assistant"
3737
description: "Answers questions related to statements and revenue of customer accounts: FinNova Bank, MediCore Systems, TechWave Solutions"
38-
url: "<HOST>"
38+
url: "http://host/query/accounts"
3939
config:
4040
method: 'POST'
4141
headers:
@@ -49,4 +49,4 @@ tools:
4949
json:
5050
- "response"
5151
examples:
52-
- "What is the revenue from FinNova Bank?"
52+
- "Total Payments Received from FinNova Bank?"

rag/app.py

+98-9
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,6 @@
1212
MILVUS_PORT = "19530" # Default gRPC port
1313
connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT)
1414
model = SentenceTransformer("WhereIsAI/UAE-Large-V1")
15-
COLLECTION_NAME = os.getenv("COLLECTION_NAME", "product_details")
16-
17-
collection_name = COLLECTION_NAME
18-
collection = Collection(name=collection_name)
19-
20-
# Ensure collection is loaded into memory
21-
collection.load()
2215

2316
# Define request and response models
2417
class QueryRequest(BaseModel):
@@ -30,7 +23,53 @@ class QueryResponse(BaseModel):
3023
text_chunk: str
3124
score: float
3225

33-
@app.post("/query", response_model=list[QueryResponse])
26+
@app.post("/query/product", response_model=list[QueryResponse])
27+
def query_milvus_api(request: QueryRequest):
28+
"""
29+
Query the Milvus index and return the top K matches.
30+
31+
Parameters:
32+
- query_text (str): The query string to search for.
33+
- top_k (int): The number of top matches to return (default is 3).
34+
35+
Returns:
36+
- List of top matches as JSON.
37+
"""
38+
COLLECTION_NAME = "product_details"
39+
40+
collection_name = COLLECTION_NAME
41+
collection = Collection(name=collection_name)
42+
43+
# Ensure collection is loaded into memory
44+
collection.load()
45+
46+
# Vectorize the query text
47+
query_embedding = model.encode([request.query_text])[0]
48+
49+
# Define search parameters
50+
search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
51+
52+
# Perform search
53+
try:
54+
results = collection.search(
55+
data=[query_embedding],
56+
anns_field="embedding",
57+
param=search_params,
58+
limit=request.top_k,
59+
output_fields=["text_chunk"]
60+
)
61+
except Exception as e:
62+
raise HTTPException(status_code=500, detail=f"Search failed: {e}")
63+
64+
# Process results
65+
top_matches = [
66+
QueryResponse(id=result.id, text_chunk=result.entity.get("text_chunk"), score=result.distance)
67+
for result in results[0] # results[0] because search returns a list of lists
68+
]
69+
return top_matches
70+
71+
72+
@app.post("/query/hr", response_model=list[QueryResponse])
3473
def query_milvus_api(request: QueryRequest):
3574
"""
3675
Query the Milvus index and return the top K matches.
@@ -42,7 +81,57 @@ def query_milvus_api(request: QueryRequest):
4281
Returns:
4382
- List of top matches as JSON.
4483
"""
45-
# Check if the collection is loaded (Milvus requirement)
84+
COLLECTION_NAME = "HR_policies"
85+
86+
collection_name = COLLECTION_NAME
87+
collection = Collection(name=collection_name)
88+
89+
# Ensure collection is loaded into memory
90+
collection.load()
91+
92+
# Vectorize the query text
93+
query_embedding = model.encode([request.query_text])[0]
94+
95+
# Define search parameters
96+
search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
97+
98+
# Perform search
99+
try:
100+
results = collection.search(
101+
data=[query_embedding],
102+
anns_field="embedding",
103+
param=search_params,
104+
limit=request.top_k,
105+
output_fields=["text_chunk"]
106+
)
107+
except Exception as e:
108+
raise HTTPException(status_code=500, detail=f"Search failed: {e}")
109+
110+
# Process results
111+
top_matches = [
112+
QueryResponse(id=result.id, text_chunk=result.entity.get("text_chunk"), score=result.distance)
113+
for result in results[0] # results[0] because search returns a list of lists
114+
]
115+
return top_matches
116+
117+
@app.post("/query/accounts", response_model=list[QueryResponse])
118+
def query_milvus_api(request: QueryRequest):
119+
"""
120+
Query the Milvus index and return the top K matches.
121+
122+
Parameters:
123+
- query_text (str): The query string to search for.
124+
- top_k (int): The number of top matches to return (default is 3).
125+
126+
Returns:
127+
- List of top matches as JSON.
128+
"""
129+
COLLECTION_NAME = "customer_accounts"
130+
131+
collection_name = COLLECTION_NAME
132+
collection = Collection(name=collection_name)
133+
134+
# Ensure collection is loaded into memory
46135
collection.load()
47136

48137
# Vectorize the query text

rag/rag_tool_service.yaml

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: rag-app
5+
spec:
6+
replicas: 1
7+
selector:
8+
matchLabels:
9+
app: rag-app
10+
template:
11+
metadata:
12+
labels:
13+
app: rag-app
14+
spec:
15+
containers:
16+
- name: rag-app-container
17+
image: quay.io/shanand/kubecon-agent-demo-rag:v0.0.2
18+
ports:
19+
- containerPort: 8000
20+
env:
21+
- name: MILVUS_HOST
22+
value: "HOST"
23+
- name: COLLECTION_NAME
24+
value: "product_details"
25+
resources:
26+
limits:
27+
memory: "16Gi"
28+
cpu: "4"
29+
requests:
30+
memory: "4Gi"
31+
cpu: "2"
32+
---
33+
apiVersion: v1
34+
kind: Service
35+
metadata:
36+
name: rag-app-service
37+
spec:
38+
selector:
39+
app: rag-app
40+
ports:
41+
- protocol: TCP
42+
port: 80
43+
targetPort: 8000
44+
type: LoadBalancer
45+
---
46+
apiVersion: route.openshift.io/v1
47+
kind: Route
48+
metadata:
49+
name: rag-app-route
50+
spec:
51+
to:
52+
kind: Service
53+
name: rag-app-service
54+
port:
55+
targetPort: 80

rag/test_rag_endpoint.ipynb

+47-21
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,16 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 20,
12+
"execution_count": 1,
1313
"metadata": {},
14-
"outputs": [
15-
{
16-
"name": "stdout",
17-
"output_type": "stream",
18-
"text": [
19-
"Test Passed: Received response from /query\n",
20-
"Response JSON: [{'id': 453547963383558110, 'text_chunk': '<hr />\\n<h1><strong>CloudForge Dynamics</strong></h1>\\n<p><strong>Innovating Cloud Solutions for the Next Generation of Enterprises</strong></p>\\n<hr />\\n<h2><strong>Vision</strong></h2>\\n<p>At <strong>CloudForge Dynamics</strong>, our vision is to <strong>empower businesses globally by providing cutting-edge cloud solutions that drive innovation, scalability, and efficiency</strong>. We strive to be at the forefront of cloud technology, enabling organizations to seamlessly transition into the cloud era while optimizing their operations and unlocking new growth opportunities.</p>\\n<hr />\\n<h2><strong>Business Plan</strong></h2>\\n<h3><strong>Executive Summary</strong></h3>', 'score': 0.9139557480812073}, {'id': 453547963383558246, 'text_chunk': '<hr />\\n<h1><strong>CloudForge Dynamics</strong></h1>\\n<p><strong>Innovating Cloud Solutions for the Next Generation of Enterprises</strong></p>\\n<hr />\\n<h2><strong>Vision</strong></h2>\\n<p>At <strong>CloudForge Dynamics</strong>, our vision is to <strong>empower businesses globally by providing cutting-edge cloud solutions that drive innovation, scalability, and efficiency</strong>. We strive to be at the forefront of cloud technology, enabling organizations to seamlessly transition into the cloud era while optimizing their operations and unlocking new growth opportunities.</p>\\n<hr />\\n<h2><strong>Business Plan</strong></h2>\\n<h3><strong>Executive Summary</strong></h3>', 'score': 0.9139557480812073}, {'id': 453547963383558111, 'text_chunk': '<hr />\\n<h2><strong>Business Plan</strong></h2>\\n<h3><strong>Executive Summary</strong></h3>\\n<p>CloudForge Dynamics is a leading technology company specializing in cloud migration services, Kubernetes orchestration, and AI-powered cloud solutions. Our mission is to simplify and accelerate the adoption of cloud technologies for businesses of all sizes. We offer a comprehensive suite of products and services designed to address the complex challenges of cloud integration, infrastructure management, and application modernization.</p>\\n<h3><strong>Market Analysis</strong></h3>\\n<ul>\\n<li><strong>Industry Growth</strong>: The global cloud computing market is expected to reach $832.1 billion by 2025, driven by increased adoption of cloud services.</li>\\n<li><strong>Target Markets</strong>:</li>\\n<li><strong>Small and Medium-sized Enterprises (SMEs)</strong> seeking affordable cloud migration solutions.</li>', 'score': 0.9216384291648865}]\n"
21-
]
22-
}
23-
],
14+
"outputs": [],
2415
"source": [
2516
"import requests\n",
2617
"\n",
27-
"# Define the base URL of the FastAPI app\n",
28-
"BASE_URL = \"<URL>\" # Update if using a different host or port\n",
29-
"\n",
30-
"def test_query_endpoint():\n",
18+
"def test_query_endpoint(BASE_URL, query):\n",
3119
" # Define the test input for the query\n",
3220
" payload = {\n",
33-
" \"query_text\": \"What are the major products at CloudForge?\",\n",
21+
" \"query_text\": query,\n",
3422
" \"top_k\": 3\n",
3523
" }\n",
3624
"\n",
@@ -46,11 +34,49 @@
4634
" # Print out an error message if the request failed\n",
4735
" print(\"Test Failed: Could not reach /query endpoint\")\n",
4836
" print(\"Status Code:\", response.status_code)\n",
49-
" print(\"Response Text:\", response.text)\n",
50-
"\n",
51-
"# Run the test\n",
52-
"if __name__ == \"__main__\":\n",
53-
" test_query_endpoint()\n"
37+
" print(\"Response Text:\", response.text)\n"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": 4,
43+
"metadata": {},
44+
"outputs": [
45+
{
46+
"name": "stdout",
47+
"output_type": "stream",
48+
"text": [
49+
"Test Passed: Received response from /query\n",
50+
"Response JSON: [{'id': 453726272212697329, 'text_chunk': '<p><strong>Q1: How does AI Optimizer learn and adapt to my applications?</strong></p>\\n<p><strong>A:</strong> The AI Optimizer collects data on application performance, resource utilization, and user demand over time. It employs machine learning models to identify patterns and trends, allowing it to predict future needs and make proactive adjustments.</p>\\n<p><strong>Q2: What kinds of optimizations can AI Optimizer perform automatically?</strong></p>', 'score': 150.1814422607422}, {'id': 453726272212697324, 'text_chunk': '<h2><strong>3. AI Optimizer</strong></h2>\\n<h3><strong>Detailed Description</strong></h3>\\n<p><strong>AI Optimizer</strong> is an intelligent performance tuning solution that leverages artificial intelligence and machine learning to optimize cloud application performance. It continuously monitors applications and infrastructure, learns from usage patterns, and makes real-time adjustments to resource allocations and configurations to enhance efficiency and reduce costs.</p>', 'score': 158.7060546875}, {'id': 453726272212697330, 'text_chunk': '<p><strong>Q2: What kinds of optimizations can AI Optimizer perform automatically?</strong></p>\\n<p><strong>A:</strong> It can adjust resource allocations (CPU, memory), scale instances up or down, redistribute workloads, and modify configurations to enhance performance and efficiency without manual intervention.</p>\\n<p><strong>Q3: How does AI Optimizer help reduce cloud costs?</strong></p>', 'score': 170.96548461914062}]\n"
51+
]
52+
}
53+
],
54+
"source": [
55+
"BASE_URL = \"http://host.com\"\n",
56+
"query = \"AI optimizer?\"\n",
57+
"test_query_endpoint(BASE_URL, query)"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": null,
63+
"metadata": {},
64+
"outputs": [],
65+
"source": [
66+
"BASE_URL = \"http://host.com\"\n",
67+
"query = \"How do I onboard the first day?\"\n",
68+
"test_query_endpoint(BASE_URL, query)"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": null,
74+
"metadata": {},
75+
"outputs": [],
76+
"source": [
77+
"BASE_URL = \"http://host.com\"\n",
78+
"query = \"What is the revenue of Finova bank?\"\n",
79+
"test_query_endpoint(BASE_URL, query)"
5480
]
5581
}
5682
],

rag/vector_db/standalone_milvus.yaml

+25-14
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,16 @@ spec:
2525
value: "/milvus/configs/embedEtcd.yaml"
2626
- name: COMMON_STORAGETYPE
2727
value: "local"
28+
resources:
29+
limits:
30+
memory: "16Gi"
31+
cpu: "4"
32+
requests:
33+
memory: "4Gi"
34+
cpu: "2"
2835
ports:
2936
- containerPort: 19530
30-
- containerPort: 9091
37+
- containerPort: 9093
3138
- containerPort: 2379
3239
volumeMounts:
3340
- name: milvus-volume
@@ -39,17 +46,13 @@ spec:
3946
allowPrivilegeEscalation: false
4047
capabilities:
4148
drop: ["ALL"]
42-
livenessProbe:
43-
httpGet:
44-
path: /healthz
45-
port: 9091
46-
initialDelaySeconds: 90
47-
periodSeconds: 30
48-
timeoutSeconds: 20
49-
failureThreshold: 3
5049
volumes:
50+
# - name: milvus-volume
51+
# emptyDir: {}
5152
- name: milvus-volume
52-
emptyDir: {}
53+
persistentVolumeClaim:
54+
claimName: milvus-data-pvc
55+
5356
- name: config-embedetcd
5457
configMap:
5558
name: embedetcd-config
@@ -79,10 +82,6 @@ spec:
7982
protocol: TCP
8083
port: 19530
8184
targetPort: 19530
82-
- name: http
83-
protocol: TCP
84-
port: 9091
85-
targetPort: 9091
8685
- name: etcd
8786
protocol: TCP
8887
port: 2379
@@ -100,3 +99,15 @@ spec:
10099
name: milvus-service
101100
port:
102101
targetPort: http
102+
103+
---
104+
apiVersion: v1
105+
kind: PersistentVolumeClaim
106+
metadata:
107+
name: milvus-data-pvc
108+
spec:
109+
accessModes:
110+
- ReadWriteOnce
111+
resources:
112+
requests:
113+
storage: 20Gi

0 commit comments

Comments
 (0)