Make the service more robust

Shreyanand · Shreyanand · commit 6e68b107927d · 2024-11-06T16:29:28.000-05:00
Signed-off-by:Shreyanand &lt;shanand@redhat.com&gt;
diff --git a/config.yaml b/config.yaml
@@ -1,7 +1,7 @@
 tools:
   - name: "product_assistant"
     description: "Answers questions related to CloudForge products: CloudForge Migrate, Secure, AI Optimizer, DevOps Accelerator, Kubernetes Orchestrator Pro"
-    url: "<HOST>"
+    url: "host/query/product"
     config:
       method: 'POST'
       headers:
@@ -15,10 +15,10 @@ tools:
         json:
           - "response"
       examples:
-        - "What is the CloudForge Migrate product?"
+        - "What does the CloudForge Migrate product do?"
   - name: "HR_assistant"
     description: "Answers questions related to HR, employment, onboarding, culture, policies, workplace, IT, and security"
-    url: "<HOST>"
+    url: "http://host/query/hr"
     config:
       method: 'POST'
       headers:
@@ -32,10 +32,10 @@ tools:
         json:
           - "response"
       examples:
-        - "What should I do before the start date?"
+        - "What HR things should I do before the start date?"
   - name: "accounts_assistant"
     description: "Answers questions related to statements and revenue of customer accounts: FinNova Bank, MediCore Systems, TechWave Solutions"
-    url: "<HOST>"
+    url: "http://host/query/accounts"
     config:
       method: 'POST'
       headers:
@@ -49,4 +49,4 @@ tools:
         json:
           - "response"
       examples:
-        - "What is the revenue from FinNova Bank?"
+        - "Total Payments Received from FinNova Bank?"
diff --git a/rag/app.py b/rag/app.py
@@ -12,13 +12,6 @@
 MILVUS_PORT = "19530"           # Default gRPC port
 connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT)
 model = SentenceTransformer("WhereIsAI/UAE-Large-V1")
-COLLECTION_NAME = os.getenv("COLLECTION_NAME", "product_details")
-
-collection_name = COLLECTION_NAME
-collection = Collection(name=collection_name)
-
-# Ensure collection is loaded into memory
-collection.load()
 
 # Define request and response models
 class QueryRequest(BaseModel):
@@ -30,7 +23,53 @@ class QueryResponse(BaseModel):
     text_chunk: str
     score: float
 
-@app.post("/query", response_model=list[QueryResponse])
+@app.post("/query/product", response_model=list[QueryResponse])
+def query_milvus_api(request: QueryRequest):
+    """
+    Query the Milvus index and return the top K matches.
+
+    Parameters:
+    - query_text (str): The query string to search for.
+    - top_k (int): The number of top matches to return (default is 3).
+
+    Returns:
+    - List of top matches as JSON.
+    """
+    COLLECTION_NAME = "product_details"
+
+    collection_name = COLLECTION_NAME
+    collection = Collection(name=collection_name)
+
+    # Ensure collection is loaded into memory
+    collection.load()
+
+    # Vectorize the query text
+    query_embedding = model.encode([request.query_text])[0]
+
+    # Define search parameters
+    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
+    
+    # Perform search
+    try:
+        results = collection.search(
+            data=[query_embedding],
+            anns_field="embedding",
+            param=search_params,
+            limit=request.top_k,
+            output_fields=["text_chunk"]
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Search failed: {e}")
+
+    # Process results
+    top_matches = [
+        QueryResponse(id=result.id, text_chunk=result.entity.get("text_chunk"), score=result.distance)
+        for result in results[0]  # results[0] because search returns a list of lists
+    ]
+    return top_matches
+
+
+@app.post("/query/hr", response_model=list[QueryResponse])
 def query_milvus_api(request: QueryRequest):
     """
     Query the Milvus index and return the top K matches.
@@ -42,7 +81,57 @@ def query_milvus_api(request: QueryRequest):
     Returns:
     - List of top matches as JSON.
     """
-    # Check if the collection is loaded (Milvus requirement)
+    COLLECTION_NAME = "HR_policies"
+
+    collection_name = COLLECTION_NAME
+    collection = Collection(name=collection_name)
+
+    # Ensure collection is loaded into memory
+    collection.load()
+
+    # Vectorize the query text
+    query_embedding = model.encode([request.query_text])[0]
+
+    # Define search parameters
+    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
+    
+    # Perform search
+    try:
+        results = collection.search(
+            data=[query_embedding],
+            anns_field="embedding",
+            param=search_params,
+            limit=request.top_k,
+            output_fields=["text_chunk"]
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Search failed: {e}")
+
+    # Process results
+    top_matches = [
+        QueryResponse(id=result.id, text_chunk=result.entity.get("text_chunk"), score=result.distance)
+        for result in results[0]  # results[0] because search returns a list of lists
+    ]
+    return top_matches
+
+@app.post("/query/accounts", response_model=list[QueryResponse])
+def query_milvus_api(request: QueryRequest):
+    """
+    Query the Milvus index and return the top K matches.
+
+    Parameters:
+    - query_text (str): The query string to search for.
+    - top_k (int): The number of top matches to return (default is 3).
+
+    Returns:
+    - List of top matches as JSON.
+    """
+    COLLECTION_NAME = "customer_accounts"
+
+    collection_name = COLLECTION_NAME
+    collection = Collection(name=collection_name)
+
+    # Ensure collection is loaded into memory
     collection.load()
 
     # Vectorize the query text
diff --git a/rag/rag_tool_service.yaml b/rag/rag_tool_service.yaml
@@ -0,0 +1,55 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: rag-app
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: rag-app
+  template:
+    metadata:
+      labels:
+        app: rag-app
+    spec:
+      containers:
+        - name: rag-app-container
+          image: quay.io/shanand/kubecon-agent-demo-rag:v0.0.2
+          ports:
+            - containerPort: 8000
+          env:
+            - name: MILVUS_HOST
+              value: "HOST"
+            - name: COLLECTION_NAME
+              value: "product_details"
+          resources:
+            limits:
+              memory: "16Gi"
+              cpu: "4"
+            requests:
+              memory: "4Gi"
+              cpu: "2"
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: rag-app-service
+spec:
+  selector:
+    app: rag-app
+  ports:
+    - protocol: TCP
+      port: 80
+      targetPort: 8000
+  type: LoadBalancer
+---
+apiVersion: route.openshift.io/v1
+kind: Route
+metadata:
+  name: rag-app-route
+spec:
+  to:
+    kind: Service
+    name: rag-app-service
+  port:
+    targetPort: 80
diff --git a/rag/test_rag_endpoint.ipynb b/rag/test_rag_endpoint.ipynb
@@ -9,28 +9,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Test Passed: Received response from /query\n",
-      "Response JSON: [{'id': 453547963383558110, 'text_chunk': '<hr />\\n<h1><strong>CloudForge Dynamics</strong></h1>\\n<p><strong>Innovating Cloud Solutions for the Next Generation of Enterprises</strong></p>\\n<hr />\\n<h2><strong>Vision</strong></h2>\\n<p>At <strong>CloudForge Dynamics</strong>, our vision is to <strong>empower businesses globally by providing cutting-edge cloud solutions that drive innovation, scalability, and efficiency</strong>. We strive to be at the forefront of cloud technology, enabling organizations to seamlessly transition into the cloud era while optimizing their operations and unlocking new growth opportunities.</p>\\n<hr />\\n<h2><strong>Business Plan</strong></h2>\\n<h3><strong>Executive Summary</strong></h3>', 'score': 0.9139557480812073}, {'id': 453547963383558246, 'text_chunk': '<hr />\\n<h1><strong>CloudForge Dynamics</strong></h1>\\n<p><strong>Innovating Cloud Solutions for the Next Generation of Enterprises</strong></p>\\n<hr />\\n<h2><strong>Vision</strong></h2>\\n<p>At <strong>CloudForge Dynamics</strong>, our vision is to <strong>empower businesses globally by providing cutting-edge cloud solutions that drive innovation, scalability, and efficiency</strong>. We strive to be at the forefront of cloud technology, enabling organizations to seamlessly transition into the cloud era while optimizing their operations and unlocking new growth opportunities.</p>\\n<hr />\\n<h2><strong>Business Plan</strong></h2>\\n<h3><strong>Executive Summary</strong></h3>', 'score': 0.9139557480812073}, {'id': 453547963383558111, 'text_chunk': '<hr />\\n<h2><strong>Business Plan</strong></h2>\\n<h3><strong>Executive Summary</strong></h3>\\n<p>CloudForge Dynamics is a leading technology company specializing in cloud migration services, Kubernetes orchestration, and AI-powered cloud solutions. Our mission is to simplify and accelerate the adoption of cloud technologies for businesses of all sizes. We offer a comprehensive suite of products and services designed to address the complex challenges of cloud integration, infrastructure management, and application modernization.</p>\\n<h3><strong>Market Analysis</strong></h3>\\n<ul>\\n<li><strong>Industry Growth</strong>: The global cloud computing market is expected to reach $832.1 billion by 2025, driven by increased adoption of cloud services.</li>\\n<li><strong>Target Markets</strong>:</li>\\n<li><strong>Small and Medium-sized Enterprises (SMEs)</strong> seeking affordable cloud migration solutions.</li>', 'score': 0.9216384291648865}]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import requests\n",
     "\n",
-    "# Define the base URL of the FastAPI app\n",
-    "BASE_URL = \"<URL>\"  # Update if using a different host or port\n",
-    "\n",
-    "def test_query_endpoint():\n",
+    "def test_query_endpoint(BASE_URL, query):\n",
     "    # Define the test input for the query\n",
     "    payload = {\n",
-    "        \"query_text\": \"What are the major products at CloudForge?\",\n",
+    "        \"query_text\": query,\n",
     "        \"top_k\": 3\n",
     "    }\n",
     "\n",
@@ -46,11 +34,49 @@
     "        # Print out an error message if the request failed\n",
     "        print(\"Test Failed: Could not reach /query endpoint\")\n",
     "        print(\"Status Code:\", response.status_code)\n",
-    "        print(\"Response Text:\", response.text)\n",
-    "\n",
-    "# Run the test\n",
-    "if __name__ == \"__main__\":\n",
-    "    test_query_endpoint()\n"
+    "        print(\"Response Text:\", response.text)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Test Passed: Received response from /query\n",
+      "Response JSON: [{'id': 453726272212697329, 'text_chunk': '<p><strong>Q1: How does AI Optimizer learn and adapt to my applications?</strong></p>\\n<p><strong>A:</strong> The AI Optimizer collects data on application performance, resource utilization, and user demand over time. It employs machine learning models to identify patterns and trends, allowing it to predict future needs and make proactive adjustments.</p>\\n<p><strong>Q2: What kinds of optimizations can AI Optimizer perform automatically?</strong></p>', 'score': 150.1814422607422}, {'id': 453726272212697324, 'text_chunk': '<h2><strong>3. AI Optimizer</strong></h2>\\n<h3><strong>Detailed Description</strong></h3>\\n<p><strong>AI Optimizer</strong> is an intelligent performance tuning solution that leverages artificial intelligence and machine learning to optimize cloud application performance. It continuously monitors applications and infrastructure, learns from usage patterns, and makes real-time adjustments to resource allocations and configurations to enhance efficiency and reduce costs.</p>', 'score': 158.7060546875}, {'id': 453726272212697330, 'text_chunk': '<p><strong>Q2: What kinds of optimizations can AI Optimizer perform automatically?</strong></p>\\n<p><strong>A:</strong> It can adjust resource allocations (CPU, memory), scale instances up or down, redistribute workloads, and modify configurations to enhance performance and efficiency without manual intervention.</p>\\n<p><strong>Q3: How does AI Optimizer help reduce cloud costs?</strong></p>', 'score': 170.96548461914062}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "BASE_URL = \"http://host.com\"\n",
+    "query = \"AI optimizer?\"\n",
+    "test_query_endpoint(BASE_URL, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BASE_URL = \"http://host.com\"\n",
+    "query = \"How do I onboard the first day?\"\n",
+    "test_query_endpoint(BASE_URL, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BASE_URL = \"http://host.com\"\n",
+    "query = \"What is the revenue of Finova bank?\"\n",
+    "test_query_endpoint(BASE_URL, query)"
    ]
   }
  ],
diff --git a/rag/vector_db/standalone_milvus.yaml b/rag/vector_db/standalone_milvus.yaml
@@ -25,9 +25,16 @@ spec:
               value: "/milvus/configs/embedEtcd.yaml"
             - name: COMMON_STORAGETYPE
               value: "local"
+          resources:
+            limits:
+              memory: "16Gi"
+              cpu: "4"
+            requests:
+              memory: "4Gi"
+              cpu: "2"
           ports:
             - containerPort: 19530
-            - containerPort: 9091
+            - containerPort: 9093
             - containerPort: 2379
           volumeMounts:
             - name: milvus-volume
@@ -39,17 +46,13 @@ spec:
             allowPrivilegeEscalation: false
             capabilities:
               drop: ["ALL"]
-          livenessProbe:
-            httpGet:
-              path: /healthz
-              port: 9091
-            initialDelaySeconds: 90
-            periodSeconds: 30
-            timeoutSeconds: 20
-            failureThreshold: 3
       volumes:
+        # - name: milvus-volume
+        #   emptyDir: {}
         - name: milvus-volume
-          emptyDir: {}
+          persistentVolumeClaim:
+            claimName: milvus-data-pvc
+
         - name: config-embedetcd
           configMap:
             name: embedetcd-config
@@ -79,10 +82,6 @@ spec:
       protocol: TCP
       port: 19530
       targetPort: 19530
-    - name: http
-      protocol: TCP
-      port: 9091
-      targetPort: 9091
     - name: etcd
       protocol: TCP
       port: 2379
@@ -100,3 +99,15 @@ spec:
     name: milvus-service
   port:
     targetPort: http
+
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: milvus-data-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 20Gi
diff --git a/rag/vector_db/vector_db_insert.ipynb b/rag/vector_db/vector_db_insert.ipynb