|
| 1 | +--- |
| 2 | +title: 'Qdrant Vector Search Tool' |
| 3 | +description: 'Semantic search capabilities for CrewAI agents using Qdrant vector database' |
| 4 | +icon: magnifying-glass-plus |
| 5 | +--- |
| 6 | + |
| 7 | +# `QdrantVectorSearchTool` |
| 8 | + |
| 9 | +The Qdrant Vector Search Tool enables semantic search capabilities in your CrewAI agents by leveraging [Qdrant](https://qdrant.tech/), a vector similarity search engine. This tool allows your agents to search through documents stored in a Qdrant collection using semantic similarity. |
| 10 | + |
| 11 | +## Installation |
| 12 | + |
| 13 | +Install the required packages: |
| 14 | + |
| 15 | +```bash |
| 16 | +uv pip install 'crewai[tools] qdrant-client' |
| 17 | +``` |
| 18 | + |
| 19 | +## Basic Usage |
| 20 | + |
| 21 | +Here's a minimal example of how to use the tool: |
| 22 | + |
| 23 | +```python |
| 24 | +from crewai import Agent |
| 25 | +from crewai_tools import QdrantVectorSearchTool |
| 26 | + |
| 27 | +# Initialize the tool |
| 28 | +qdrant_tool = QdrantVectorSearchTool( |
| 29 | + qdrant_url="your_qdrant_url", |
| 30 | + qdrant_api_key="your_qdrant_api_key", |
| 31 | + collection_name="your_collection" |
| 32 | +) |
| 33 | + |
| 34 | +# Create an agent that uses the tool |
| 35 | +agent = Agent( |
| 36 | + role="Research Assistant", |
| 37 | + goal="Find relevant information in documents", |
| 38 | + tools=[qdrant_tool] |
| 39 | +) |
| 40 | + |
| 41 | +# The tool will automatically use OpenAI embeddings |
| 42 | +# and return the 3 most relevant results with scores > 0.35 |
| 43 | +``` |
| 44 | + |
| 45 | +## Complete Working Example |
| 46 | + |
| 47 | +Here's a complete example showing how to: |
| 48 | +1. Extract text from a PDF |
| 49 | +2. Generate embeddings using OpenAI |
| 50 | +3. Store in Qdrant |
| 51 | +4. Create a CrewAI agentic RAG workflow for semantic search |
| 52 | + |
| 53 | +```python |
| 54 | +import os |
| 55 | +import uuid |
| 56 | +import pdfplumber |
| 57 | +from openai import OpenAI |
| 58 | +from dotenv import load_dotenv |
| 59 | +from crewai import Agent, Task, Crew, Process, LLM |
| 60 | +from crewai_tools import QdrantVectorSearchTool |
| 61 | +from qdrant_client import QdrantClient |
| 62 | +from qdrant_client.models import PointStruct, Distance, VectorParams |
| 63 | + |
| 64 | +# Load environment variables |
| 65 | +load_dotenv() |
| 66 | + |
| 67 | +# Initialize OpenAI client |
| 68 | +client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
| 69 | + |
| 70 | +# Extract text from PDF |
| 71 | +def extract_text_from_pdf(pdf_path): |
| 72 | + text = [] |
| 73 | + with pdfplumber.open(pdf_path) as pdf: |
| 74 | + for page in pdf.pages: |
| 75 | + page_text = page.extract_text() |
| 76 | + if page_text: |
| 77 | + text.append(page_text.strip()) |
| 78 | + return text |
| 79 | + |
| 80 | +# Generate OpenAI embeddings |
| 81 | +def get_openai_embedding(text): |
| 82 | + response = client.embeddings.create( |
| 83 | + input=text, |
| 84 | + model="text-embedding-3-small" |
| 85 | + ) |
| 86 | + return response.data[0].embedding |
| 87 | + |
| 88 | +# Store text and embeddings in Qdrant |
| 89 | +def load_pdf_to_qdrant(pdf_path, qdrant, collection_name): |
| 90 | + # Extract text from PDF |
| 91 | + text_chunks = extract_text_from_pdf(pdf_path) |
| 92 | + |
| 93 | + # Create Qdrant collection |
| 94 | + if qdrant.collection_exists(collection_name): |
| 95 | + qdrant.delete_collection(collection_name) |
| 96 | + qdrant.create_collection( |
| 97 | + collection_name=collection_name, |
| 98 | + vectors_config=VectorParams(size=1536, distance=Distance.COSINE) |
| 99 | + ) |
| 100 | + |
| 101 | + # Store embeddings |
| 102 | + points = [] |
| 103 | + for chunk in text_chunks: |
| 104 | + embedding = get_openai_embedding(chunk) |
| 105 | + points.append(PointStruct( |
| 106 | + id=str(uuid.uuid4()), |
| 107 | + vector=embedding, |
| 108 | + payload={"text": chunk} |
| 109 | + )) |
| 110 | + qdrant.upsert(collection_name=collection_name, points=points) |
| 111 | + |
| 112 | +# Initialize Qdrant client and load data |
| 113 | +qdrant = QdrantClient( |
| 114 | + url=os.getenv("QDRANT_URL"), |
| 115 | + api_key=os.getenv("QDRANT_API_KEY") |
| 116 | +) |
| 117 | +collection_name = "example_collection" |
| 118 | +pdf_path = "path/to/your/document.pdf" |
| 119 | +load_pdf_to_qdrant(pdf_path, qdrant, collection_name) |
| 120 | + |
| 121 | +# Initialize Qdrant search tool |
| 122 | +qdrant_tool = QdrantVectorSearchTool( |
| 123 | + qdrant_url=os.getenv("QDRANT_URL"), |
| 124 | + qdrant_api_key=os.getenv("QDRANT_API_KEY"), |
| 125 | + collection_name=collection_name, |
| 126 | + limit=3, |
| 127 | + score_threshold=0.35 |
| 128 | +) |
| 129 | + |
| 130 | +# Create CrewAI agents |
| 131 | +search_agent = Agent( |
| 132 | + role="Senior Semantic Search Agent", |
| 133 | + goal="Find and analyze documents based on semantic search", |
| 134 | + backstory="""You are an expert research assistant who can find relevant |
| 135 | + information using semantic search in a Qdrant database.""", |
| 136 | + tools=[qdrant_tool], |
| 137 | + verbose=True |
| 138 | +) |
| 139 | + |
| 140 | +answer_agent = Agent( |
| 141 | + role="Senior Answer Assistant", |
| 142 | + goal="Generate answers to questions based on the context provided", |
| 143 | + backstory="""You are an expert answer assistant who can generate |
| 144 | + answers to questions based on the context provided.""", |
| 145 | + tools=[qdrant_tool], |
| 146 | + verbose=True |
| 147 | +) |
| 148 | + |
| 149 | +# Define tasks |
| 150 | +search_task = Task( |
| 151 | + description="""Search for relevant documents about the {query}. |
| 152 | + Your final answer should include: |
| 153 | + - The relevant information found |
| 154 | + - The similarity scores of the results |
| 155 | + - The metadata of the relevant documents""", |
| 156 | + agent=search_agent |
| 157 | +) |
| 158 | + |
| 159 | +answer_task = Task( |
| 160 | + description="""Given the context and metadata of relevant documents, |
| 161 | + generate a final answer based on the context.""", |
| 162 | + agent=answer_agent |
| 163 | +) |
| 164 | + |
| 165 | +# Run CrewAI workflow |
| 166 | +crew = Crew( |
| 167 | + agents=[search_agent, answer_agent], |
| 168 | + tasks=[search_task, answer_task], |
| 169 | + process=Process.sequential, |
| 170 | + verbose=True |
| 171 | +) |
| 172 | + |
| 173 | +result = crew.kickoff( |
| 174 | + inputs={"query": "What is the role of X in the document?"} |
| 175 | +) |
| 176 | +print(result) |
| 177 | +``` |
| 178 | + |
| 179 | +## Tool Parameters |
| 180 | + |
| 181 | +### Required Parameters |
| 182 | +- `qdrant_url` (str): The URL of your Qdrant server |
| 183 | +- `qdrant_api_key` (str): API key for authentication with Qdrant |
| 184 | +- `collection_name` (str): Name of the Qdrant collection to search |
| 185 | + |
| 186 | +### Optional Parameters |
| 187 | +- `limit` (int): Maximum number of results to return (default: 3) |
| 188 | +- `score_threshold` (float): Minimum similarity score threshold (default: 0.35) |
| 189 | +- `custom_embedding_fn` (Callable[[str], list[float]]): Custom function for text vectorization |
| 190 | + |
| 191 | +## Search Parameters |
| 192 | + |
| 193 | +The tool accepts these parameters in its schema: |
| 194 | +- `query` (str): The search query to find similar documents |
| 195 | +- `filter_by` (str, optional): Metadata field to filter on |
| 196 | +- `filter_value` (str, optional): Value to filter by |
| 197 | + |
| 198 | +## Return Format |
| 199 | + |
| 200 | +The tool returns results in JSON format: |
| 201 | + |
| 202 | +```json |
| 203 | +[ |
| 204 | + { |
| 205 | + "metadata": { |
| 206 | + // Any metadata stored with the document |
| 207 | + }, |
| 208 | + "context": "The actual text content of the document", |
| 209 | + "distance": 0.95 // Similarity score |
| 210 | + } |
| 211 | +] |
| 212 | +``` |
| 213 | + |
| 214 | +## Default Embedding |
| 215 | + |
| 216 | +By default, the tool uses OpenAI's `text-embedding-3-small` model for vectorization. This requires: |
| 217 | +- OpenAI API key set in environment: `OPENAI_API_KEY` |
| 218 | + |
| 219 | +## Custom Embeddings |
| 220 | + |
| 221 | +Instead of using the default embedding model, you might want to use your own embedding function in cases where you: |
| 222 | + |
| 223 | +1. Want to use a different embedding model (e.g., Cohere, HuggingFace, Ollama models) |
| 224 | +2. Need to reduce costs by using open-source embedding models |
| 225 | +3. Have specific requirements for vector dimensions or embedding quality |
| 226 | +4. Want to use domain-specific embeddings (e.g., for medical or legal text) |
| 227 | + |
| 228 | +Here's an example using a HuggingFace model: |
| 229 | + |
| 230 | +```python |
| 231 | +from transformers import AutoTokenizer, AutoModel |
| 232 | +import torch |
| 233 | + |
| 234 | +# Load model and tokenizer |
| 235 | +tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') |
| 236 | +model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') |
| 237 | + |
| 238 | +def custom_embeddings(text: str) -> list[float]: |
| 239 | + # Tokenize and get model outputs |
| 240 | + inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) |
| 241 | + outputs = model(**inputs) |
| 242 | + |
| 243 | + # Use mean pooling to get text embedding |
| 244 | + embeddings = outputs.last_hidden_state.mean(dim=1) |
| 245 | + |
| 246 | + # Convert to list of floats and return |
| 247 | + return embeddings[0].tolist() |
| 248 | + |
| 249 | +# Use custom embeddings with the tool |
| 250 | +tool = QdrantVectorSearchTool( |
| 251 | + qdrant_url="your_url", |
| 252 | + qdrant_api_key="your_key", |
| 253 | + collection_name="your_collection", |
| 254 | + custom_embedding_fn=custom_embeddings # Pass your custom function |
| 255 | +) |
| 256 | +``` |
| 257 | + |
| 258 | +## Error Handling |
| 259 | + |
| 260 | +The tool handles these specific errors: |
| 261 | +- Raises ImportError if `qdrant-client` is not installed (with option to auto-install) |
| 262 | +- Raises ValueError if `QDRANT_URL` is not set |
| 263 | +- Prompts to install `qdrant-client` if missing using `uv add qdrant-client` |
| 264 | + |
| 265 | +## Environment Variables |
| 266 | + |
| 267 | +Required environment variables: |
| 268 | +```bash |
| 269 | +export QDRANT_URL="your_qdrant_url" # If not provided in constructor |
| 270 | +export QDRANT_API_KEY="your_api_key" # If not provided in constructor |
| 271 | +export OPENAI_API_KEY="your_openai_key" # If using default embeddings |
0 commit comments