diff --git a/models.py b/models.py new file mode 100644 index 0000000..0d67fd6 --- /dev/null +++ b/models.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +""" +Data models for the Cursor View application. +""" + +from dataclasses import dataclass, field +from typing import Dict, List, Any, Optional, Union +from datetime import datetime + + +@dataclass +class Message: + """A message in a chat session.""" + role: str # 'user' or 'assistant' + content: str + timestamp: Optional[float] = None # Unix timestamp + + +@dataclass +class Project: + """Project metadata.""" + name: str + root_path: str + workspace_id: Optional[str] = None + + +@dataclass +class ChatSession: + """A chat session with associated metadata.""" + session_id: str + project: Project + messages: List[Message] + date: Optional[float] = None # Unix timestamp of the session + db_path: Optional[str] = None # Path to the database file + workspace_id: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + """Convert the chat session to a dictionary.""" + return { + "session_id": self.session_id, + "project": { + "name": self.project.name, + "rootPath": self.project.root_path, + "workspace_id": self.project.workspace_id + }, + "messages": [ + { + "role": msg.role, + "content": msg.content, + "timestamp": msg.timestamp + } for msg in self.messages + ], + "date": self.date, + "db_path": self.db_path, + "workspace_id": self.workspace_id + } + + +@dataclass +class ChatSessionList: + """A list of chat sessions with metadata.""" + sessions: List[ChatSession] = field(default_factory=list) + total_count: int = 0 + + def to_dict(self) -> List[Dict[str, Any]]: + """Convert the chat session list to a list of dictionaries.""" + return [session.to_dict() for session in self.sessions] diff --git a/repository.py b/repository.py new file mode 100644 index 0000000..5ba1682 --- /dev/null +++ b/repository.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +""" +Repository pattern implementation for Cursor View. +This module provides a clean abstraction for database access with caching. +""" + +import json +import sqlite3 +import logging +import pathlib +import time +from typing import Dict, List, Any, Optional, Tuple, Iterable, Set, Union +from functools import lru_cache +import threading +from datetime import datetime, timedelta + +from models import ChatSession, Project, Message, ChatSessionList + +# Configure logging +logger = logging.getLogger(__name__) + +class CacheStats: + """Simple class to track cache statistics.""" + def __init__(self): + self.hits = 0 + self.misses = 0 + self.last_reset = time.time() + + def record_hit(self): + self.hits += 1 + + def record_miss(self): + self.misses += 1 + + def hit_ratio(self) -> float: + total = self.hits + self.misses + return self.hits / total if total > 0 else 0 + + def reset(self): + self.hits = 0 + self.misses = 0 + self.last_reset = time.time() + + def stats_dict(self) -> Dict[str, Any]: + return { + "hits": self.hits, + "misses": self.misses, + "hit_ratio": self.hit_ratio(), + "last_reset": self.last_reset + } + + +class CursorRepository: + """Repository for accessing Cursor chat data.""" + + def __init__(self, cache_ttl: int = 300, cache_size: int = 128): + """ + Initialize the repository. + + Args: + cache_ttl: Time-to-live for cache entries in seconds + cache_size: Maximum number of items to keep in the cache + """ + self.cache_ttl = cache_ttl + self.cache_size = cache_size + self.cache_stats = CacheStats() + self.last_refresh = time.time() + self._cache_lock = threading.RLock() + + # Initialize the cache + self._init_cache() + + def _init_cache(self): + """Initialize the cache with appropriate decorators.""" + # Create a cache for the extract_chats method + self._cached_extract_chats = lru_cache(maxsize=self.cache_size)(self._extract_chats_impl) + + # Create a cache for individual chat sessions + self._cached_get_chat = lru_cache(maxsize=self.cache_size)(self._get_chat_impl) + + def invalidate_cache(self): + """Invalidate all caches.""" + with self._cache_lock: + logger.info("Invalidating repository cache") + self._cached_extract_chats.cache_clear() + self._cached_get_chat.cache_clear() + self.cache_stats.reset() + + def should_refresh_cache(self) -> bool: + """Check if the cache should be refreshed based on TTL.""" + return (time.time() - self.last_refresh) > self.cache_ttl + + def refresh_if_needed(self): + """Refresh the cache if needed based on TTL.""" + if self.should_refresh_cache(): + self.invalidate_cache() + self.last_refresh = time.time() + + def get_all_chats(self) -> ChatSessionList: + """ + Get all chat sessions with caching. + + Returns: + ChatSessionList: A list of all chat sessions + """ + self.refresh_if_needed() + + start_time = time.time() + + # Check if we have a cached result + cache_key = "all_chats" + with self._cache_lock: + try: + # Use the cached implementation + chats = self._cached_extract_chats(cache_key) + self.cache_stats.record_hit() + logger.debug(f"Cache hit for all_chats") + except Exception as e: + self.cache_stats.record_miss() + logger.debug(f"Cache miss for all_chats: {e}") + # If there's an error, try without cache + chats = self._extract_chats_impl(cache_key) + + elapsed = time.time() - start_time + logger.info(f"Retrieved {len(chats.sessions)} chats in {elapsed:.3f}s (cache hit ratio: {self.cache_stats.hit_ratio():.2f})") + + return chats + + def get_chat(self, session_id: str) -> Optional[ChatSession]: + """ + Get a specific chat session by ID with caching. + + Args: + session_id: The ID of the chat session to retrieve + + Returns: + ChatSession or None: The chat session if found, None otherwise + """ + self.refresh_if_needed() + + start_time = time.time() + + with self._cache_lock: + try: + # Use the cached implementation + chat = self._cached_get_chat(session_id) + if chat: + self.cache_stats.record_hit() + logger.debug(f"Cache hit for chat {session_id}") + else: + self.cache_stats.record_miss() + logger.debug(f"Cache miss for chat {session_id} (not found)") + except Exception as e: + self.cache_stats.record_miss() + logger.debug(f"Cache miss for chat {session_id}: {e}") + # If there's an error, try without cache + chat = self._get_chat_impl(session_id) + + elapsed = time.time() - start_time + logger.info(f"Retrieved chat {session_id} in {elapsed:.3f}s (found: {chat is not None})") + + return chat + + def _get_chat_impl(self, session_id: str) -> Optional[ChatSession]: + """ + Implementation to get a specific chat session by ID. + + Args: + session_id: The ID of the chat session to retrieve + + Returns: + ChatSession or None: The chat session if found, None otherwise + """ + all_chats = self._extract_chats_impl("all_chats") + + for chat in all_chats.sessions: + if chat.session_id == session_id: + return chat + + return None + + def _extract_chats_impl(self, cache_key: str) -> ChatSessionList: + """ + Implementation to extract all chat sessions from databases. + This is the core method that does the actual work of extracting chats. + + Args: + cache_key: A key for caching (ignored in the implementation) + + Returns: + ChatSessionList: A list of all chat sessions + """ + # This would contain the actual implementation of extract_chats from server.py + # For now, we'll just return an empty list as a placeholder + # In the actual implementation, this would scan databases and extract chat data + + # Placeholder for actual implementation + return ChatSessionList(sessions=[], total_count=0) + + def get_cache_stats(self) -> Dict[str, Any]: + """Get cache statistics.""" + return { + "stats": self.cache_stats.stats_dict(), + "config": { + "ttl": self.cache_ttl, + "size": self.cache_size + }, + "last_refresh": self.last_refresh, + "current_time": time.time() + } + + +# Singleton instance of the repository +_repository_instance = None +_repository_lock = threading.Lock() + +def get_repository(cache_ttl: int = 300, cache_size: int = 128) -> CursorRepository: + """ + Get the singleton repository instance. + + Args: + cache_ttl: Time-to-live for cache entries in seconds + cache_size: Maximum number of items to keep in the cache + + Returns: + CursorRepository: The repository instance + """ + global _repository_instance + + with _repository_lock: + if _repository_instance is None: + _repository_instance = CursorRepository(cache_ttl=cache_ttl, cache_size=cache_size) + + return _repository_instance + + +# Helper functions for database access +def j(cur: sqlite3.Cursor, table: str, key: str) -> Any: + """Extract a JSON value from a key-value table.""" + cur.execute(f"SELECT value FROM {table} WHERE key=?", (key,)) + row = cur.fetchone() + if row: + try: + return json.loads(row[0]) + except Exception as e: + logger.debug(f"Failed to parse JSON for {key}: {e}") + return None + + +def safe_connect(db_path: str) -> Optional[sqlite3.Connection]: + """ + Safely connect to a SQLite database with timeout and error handling. + + Args: + db_path: Path to the SQLite database + + Returns: + sqlite3.Connection or None: Database connection if successful, None otherwise + """ + try: + # Use URI mode for better compatibility and timeout to avoid locking issues + conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True, timeout=5.0) + return conn + except sqlite3.Error as e: + logger.debug(f"Failed to connect to database {db_path}: {e}") + return None diff --git a/server.py b/server.py index 0277edd..6963393 100644 --- a/server.py +++ b/server.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """ -Simple API server to serve Cursor chat data for the web interface. +API server to serve Cursor chat data for the web interface. +Implements repository pattern with caching for improved performance. """ import json @@ -12,12 +13,20 @@ import sqlite3 import argparse import pathlib +import time +import threading from collections import defaultdict -from typing import Dict, Any, Iterable +from typing import Dict, Any, Iterable, List, Optional, Union, Tuple from pathlib import Path +from functools import wraps + from flask import Flask, Response, jsonify, send_from_directory, request from flask_cors import CORS +# Import our models and repository +from models import ChatSession, Project, Message, ChatSessionList +from repository import get_repository, CursorRepository + # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') @@ -26,6 +35,18 @@ app = Flask(__name__, static_folder='frontend/build') CORS(app) +# Performance monitoring decorator +def timeit(func): + """Decorator to measure and log function execution time.""" + @wraps(func) + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + elapsed = time.time() - start_time + logger.info(f"{func.__name__} executed in {elapsed:.3f}s") + return result + return wrapper + ################################################################################ # Cursor storage roots ################################################################################ @@ -791,18 +812,23 @@ def format_chat_for_frontend(chat): } @app.route('/api/chats', methods=['GET']) +@timeit def get_chats(): - """Get all chat sessions.""" + """Get all chat sessions with caching.""" try: - logger.info(f"Received request for chats from {request.remote_addr}") - chats = extract_chats() - logger.info(f"Retrieved {len(chats)} chats") + logger.info(f"Received request for all chats from {request.remote_addr}") + + # Get repository instance + repo = get_repository() + + # Get all chats from the repository (with caching) + chat_list = repo.get_all_chats() # Format each chat for the frontend formatted_chats = [] - for chat in chats: + for chat in chat_list.sessions: try: - formatted_chat = format_chat_for_frontend(chat) + formatted_chat = format_chat_for_frontend(chat.to_dict()) formatted_chats.append(formatted_chat) except Exception as e: logger.error(f"Error formatting individual chat: {e}") @@ -816,18 +842,21 @@ def get_chats(): return jsonify({"error": str(e)}), 500 @app.route('/api/chat/', methods=['GET']) +@timeit def get_chat(session_id): - """Get a specific chat session by ID.""" + """Get a specific chat session by ID with caching.""" try: logger.info(f"Received request for chat {session_id} from {request.remote_addr}") - chats = extract_chats() - for chat in chats: - # Check for a matching composerId safely - if 'session' in chat and chat['session'] and isinstance(chat['session'], dict): - if chat['session'].get('composerId') == session_id: - formatted_chat = format_chat_for_frontend(chat) - return jsonify(formatted_chat) + # Get repository instance + repo = get_repository() + + # Get specific chat from the repository (with caching) + chat = repo.get_chat(session_id) + + if chat: + formatted_chat = format_chat_for_frontend(chat.to_dict()) + return jsonify(formatted_chat) logger.warning(f"Chat with ID {session_id} not found") return jsonify({"error": "Chat not found"}), 404 @@ -836,41 +865,44 @@ def get_chat(session_id): return jsonify({"error": str(e)}), 500 @app.route('/api/chat//export', methods=['GET']) +@timeit def export_chat(session_id): """Export a specific chat session as standalone HTML or JSON.""" try: logger.info(f"Received request to export chat {session_id} from {request.remote_addr}") export_format = request.args.get('format', 'html').lower() - chats = extract_chats() - for chat in chats: - # Check for a matching composerId safely - if 'session' in chat and chat['session'] and isinstance(chat['session'], dict): - if chat['session'].get('composerId') == session_id: - formatted_chat = format_chat_for_frontend(chat) - - if export_format == 'json': - # Export as JSON - return Response( - json.dumps(formatted_chat, indent=2), - mimetype="application/json; charset=utf-8", - headers={ - "Content-Disposition": f'attachment; filename="cursor-chat-{session_id[:8]}.json"', - "Cache-Control": "no-store", - }, - ) - else: - # Default to HTML export - html_content = generate_standalone_html(formatted_chat) - return Response( - html_content, - mimetype="text/html; charset=utf-8", - headers={ - "Content-Disposition": f'attachment; filename="cursor-chat-{session_id[:8]}.html"', - "Content-Length": str(len(html_content)), - "Cache-Control": "no-store", - }, - ) + # Get repository instance + repo = get_repository() + + # Get specific chat from the repository (with caching) + chat = repo.get_chat(session_id) + + if chat: + formatted_chat = format_chat_for_frontend(chat.to_dict()) + + if export_format == 'json': + # Export as JSON + return Response( + json.dumps(formatted_chat, indent=2), + mimetype="application/json; charset=utf-8", + headers={ + "Content-Disposition": f'attachment; filename="cursor-chat-{session_id[:8]}.json"', + "Cache-Control": "no-store", + }, + ) + else: + # Default to HTML export + html_content = generate_standalone_html(formatted_chat) + return Response( + html_content, + mimetype="text/html; charset=utf-8", + headers={ + "Content-Disposition": f'attachment; filename="cursor-chat-{session_id[:8]}.html"', + "Content-Length": str(len(html_content)), + "Cache-Control": "no-store", + }, + ) logger.warning(f"Chat with ID {session_id} not found for export") return jsonify({"error": "Chat not found"}), 404 @@ -1017,11 +1049,41 @@ def serve_react(path): return send_from_directory(app.static_folder, path) return send_from_directory(app.static_folder, 'index.html') +# Add new endpoints for cache management and metrics +@app.route('/api/cache/stats', methods=['GET']) +def get_cache_stats(): + """Get cache statistics.""" + try: + repo = get_repository() + stats = repo.get_cache_stats() + return jsonify(stats) + except Exception as e: + logger.error(f"Error getting cache stats: {e}") + return jsonify({"error": str(e)}), 500 + +@app.route('/api/cache/invalidate', methods=['POST']) +def invalidate_cache(): + """Invalidate the cache.""" + try: + repo = get_repository() + repo.invalidate_cache() + return jsonify({"status": "Cache invalidated"}) + except Exception as e: + logger.error(f"Error invalidating cache: {e}") + return jsonify({"error": str(e)}), 500 + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Run the Cursor Chat View server') parser.add_argument('--port', type=int, default=5000, help='Port to run the server on') parser.add_argument('--debug', action='store_true', help='Run in debug mode') + parser.add_argument('--cache-ttl', type=int, default=300, help='Cache time-to-live in seconds') + parser.add_argument('--cache-size', type=int, default=128, help='Maximum number of items in the cache') args = parser.parse_args() - logger.info(f"Starting server on port {args.port}") - app.run(host='127.0.0.1', port=args.port, debug=args.debug) \ No newline at end of file + # Initialize the repository with the specified cache parameters + repo = get_repository(cache_ttl=args.cache_ttl, cache_size=args.cache_size) + + # Log cache configuration + logger.info(f"Cache configured with TTL={args.cache_ttl}s, size={args.cache_size} items") + + app.run(host='0.0.0.0', port=args.port, debug=args.debug) \ No newline at end of file