Patrick48777
diff --git a/‎.gitignore
Lines changed: 45 additions & 0 deletions b/‎.gitignore
Lines changed: 45 additions & 0 deletions
diff --git a/‎README.txt
Lines changed: 40 additions & 0 deletions b/‎README.txt
Lines changed: 40 additions & 0 deletions
diff --git a/‎app.py
Lines changed: 80 additions & 0 deletions b/‎app.py
Lines changed: 80 additions & 0 deletions
diff --git a/‎clip_encoder.py
Lines changed: 138 additions & 0 deletions b/‎clip_encoder.py
Lines changed: 138 additions & 0 deletions
@@ -0,0 +1,45 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual Environment
+venv/
+ENV/
+
+# Project specific
+clip_search_index.pkl
+static/uploads/*
+!static/uploads/.gitkeep
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# OS specific
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
@@ -0,0 +1,40 @@
+# One-Shot Image Search Engine
+
+A semantic image search engine built with CLIP and FAISS that allows searching by text descriptions or similar images.
+
+## Features
+
+- **Text-to-Image Search**: Find images by describing them in natural language
+- **Image-to-Image Search**: Upload an image to find visually similar ones
+- **Fast Vector Search**: Uses FAISS for efficient similarity search
+- **Pre-trained AI Model**: Leverages OpenAI's CLIP for understanding image content
+- **Web Interface**: Clean, responsive UI built with Flask and Bootstrap
+
+## Technologies Used
+
+- **CLIP**: OpenAI's Contrastive Language-Image Pre-training model
+- **FAISS**: Facebook AI Similarity Search for vector similarity search
+- **PyTorch**: Deep learning framework
+- **Flask**: Web application framework
+- **Bootstrap**: Frontend styling
+
+## Installation
+
+1. Clone this repository:
+git clone https://github.com/shubhrat12/Image-search-engine.git
+cd image-search-engine
+2. Create a virtual environment and install dependencies:
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+pip install -r requirements.txt
+3. Run the application:
+python app.py
+4. Open your browser and go to http://127.0.0.1:5000
+
+## How It Works
+
+1. The application uses CLIP to convert images into vector embeddings
+2. These embeddings capture the semantic meaning of each image
+3. When searching with text, the query is also converted to the same vector space
+4. FAISS finds the most similar image vectors to your query vector
+5. Results are returned based on cosine similarity scores
@@ -0,0 +1,80 @@
+import os
+from flask import Flask, render_template, request, redirect, url_for
+from werkzeug.utils import secure_filename
+from clip_encoder import ClipEncoder
+import time
+
+app = Flask(__name__)
+app.config['UPLOAD_FOLDER'] = 'static/uploads'
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max upload size
+os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+
+ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
+
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+# Initialize the CLIP encoder
+encoder = ClipEncoder()
+
+# Check if the index file exists, otherwise create it
+if os.path.exists('clip_search_index.pkl'):
+    encoder.load_index()
+else:
+    # Build the index from images in the static/images directory
+    encoder.encode_images('static/images')
+    encoder.save_index()
+
+@app.route('/', methods=['GET'])
+def index():
+    return render_template('index.html')
+
+@app.route('/search', methods=['POST'])
+def search():
+    if 'text_query' in request.form:
+        # Text-based search
+        query = request.form['text_query']
+        if not query:
+            return redirect(url_for('index'))
+        
+        start_time = time.time()
+        results = encoder.search(query, k=12)
+        search_time = time.time() - start_time
+        
+        # Convert image paths to web paths
+        for result in results:
+            result['image_url'] = '/' + result['image_path'].replace('\\', '/')
+        
+        return render_template('results.html', 
+                               query=query, 
+                               results=results, 
+                               search_type="text", 
+                               search_time=search_time)
+    
+    elif 'image_query' in request.files:
+        # Image-based search
+        file = request.files['image_query']
+        if file and allowed_file(file.filename):
+            filename = secure_filename(file.filename)
+            file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+            file.save(file_path)
+            
+            start_time = time.time()
+            results = encoder.search_by_image(file_path, k=12)
+            search_time = time.time() - start_time
+            
+            # Convert image paths to web paths
+            query_image_url = url_for('static', filename=f'uploads/{filename}')
+            for result in results:
+                result['image_url'] = '/' + result['image_path'].replace('\\', '/')
+            
+            return render_template('results.html', 
+                                   query_image=query_image_url, 
+                                   results=results, 
+                                   search_type="image", 
+                                   search_time=search_time)
+    
+    return redirect(url_for('index'))
+
+if __name__ == '__main__':
+    app.run(debug=True)
@@ -0,0 +1,138 @@
+import torch
+from PIL import Image
+import os
+from transformers import CLIPProcessor, CLIPModel
+import numpy as np
+import faiss
+import pickle
+import time
+
+class ClipEncoder:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
+        
+        # Load CLIP model
+        self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
+        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+        
+        # Initialize FAISS index
+        self.dimension = 512  # CLIP embedding dimension
+        self.index = faiss.IndexFlatIP(self.dimension)  # Inner product for cosine similarity
+        
+        # Store mapping of indices to image paths
+        self.image_paths = []
+    
+    def encode_images(self, image_dir):
+        """Encode all images in the directory and build the FAISS index"""
+        start_time = time.time()
+        print(f"Starting to encode images from {image_dir}...")
+        
+        image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) 
+                      if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
+        
+        all_embeddings = []
+        
+        for img_path in image_paths:
+            try:
+                image = Image.open(img_path).convert('RGB')
+                inputs = self.processor(images=image, return_tensors="pt").to(self.device)
+                
+                with torch.no_grad():
+                    image_features = self.model.get_image_features(**inputs)
+                    image_embeddings = image_features.cpu().numpy()
+                
+                # Normalize embeddings
+                image_embeddings = image_embeddings / np.linalg.norm(image_embeddings, axis=1, keepdims=True)
+                
+                all_embeddings.append(image_embeddings[0])
+                self.image_paths.append(img_path)
+            except Exception as e:
+                print(f"Error processing {img_path}: {e}")
+        
+        # Add all embeddings to the FAISS index
+        if all_embeddings:
+            all_embeddings_array = np.array(all_embeddings).astype('float32')
+            self.index.add(all_embeddings_array)
+            print(f"Added {len(all_embeddings)} images to the index")
+        else:
+            print("No images were successfully encoded")
+        
+        elapsed_time = time.time() - start_time
+        print(f"Encoding completed in {elapsed_time:.2f} seconds")
+    
+    def encode_text(self, text):
+        """Encode text query using CLIP"""
+        inputs = self.processor(text=text, return_tensors="pt", padding=True).to(self.device)
+        
+        with torch.no_grad():
+            text_features = self.model.get_text_features(**inputs)
+            text_embeddings = text_features.cpu().numpy()
+        
+        # Normalize embeddings
+        text_embeddings = text_embeddings / np.linalg.norm(text_embeddings, axis=1, keepdims=True)
+        
+        return text_embeddings
+    
+    def search(self, query, k=8):
+        """Search for similar images using text query"""
+        text_embedding = self.encode_text(query)
+        scores, indices = self.index.search(text_embedding.astype('float32'), k)
+        
+        results = []
+        for idx, score in zip(indices[0], scores[0]):
+            if idx != -1:  # Valid index
+                results.append({
+                    'image_path': self.image_paths[idx],
+                    'score': float(score),
+                    'filename': os.path.basename(self.image_paths[idx])
+                })
+        
+        return results
+    
+    def encode_query_image(self, image_path):
+        """Encode query image using CLIP"""
+        image = Image.open(image_path).convert('RGB')
+        inputs = self.processor(images=image, return_tensors="pt").to(self.device)
+        
+        with torch.no_grad():
+            image_features = self.model.get_image_features(**inputs)
+            image_embeddings = image_features.cpu().numpy()
+        
+        # Normalize embeddings
+        image_embeddings = image_embeddings / np.linalg.norm(image_embeddings, axis=1, keepdims=True)
+        
+        return image_embeddings
+    
+    def search_by_image(self, image_path, k=8):
+        """Search for similar images using an image query"""
+        image_embedding = self.encode_query_image(image_path)
+        scores, indices = self.index.search(image_embedding.astype('float32'), k)
+        
+        results = []
+        for idx, score in zip(indices[0], scores[0]):
+            if idx != -1:  # Valid index
+                results.append({
+                    'image_path': self.image_paths[idx],
+                    'score': float(score),
+                    'filename': os.path.basename(self.image_paths[idx])
+                })
+        
+        return results
+    
+    def save_index(self, filename="clip_search_index.pkl"):
+        """Save the index and image paths to a file"""
+        with open(filename, 'wb') as f:
+            pickle.dump({
+                'index': faiss.serialize_index(self.index),
+                'image_paths': self.image_paths
+            }, f)
+        print(f"Index saved to {filename}")
+    
+    def load_index(self, filename="clip_search_index.pkl"):
+        """Load the index and image paths from a file"""
+        with open(filename, 'rb') as f:
+            data = pickle.load(f)
+            self.index = faiss.deserialize_index(data['index'])
+            self.image_paths = data['image_paths']
+        print(f"Loaded index with {len(self.image_paths)} images")