MemoriLabs · devwdave · Dec 18, 2025 · Dec 17, 2025 · Dec 17, 2025 · Dec 18, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -84,7 +84,7 @@ jobs:
         run: uv sync --dev
 
       - name: Run pytest with coverage
-        run: uv run pytest
+        run: uv run pytest --ignore=tests/benchmarks
 
       - name: Upload coverage to Codecov
         if: matrix.python-version == '3.12'

diff --git a/.gitignore b/.gitignore
@@ -52,5 +52,11 @@ AGENTS.md
 
 tests/examples/*
 
-# Integration test files (contain credentials/connection strings)
-tests/llm/clients/oss/openai/async_integration.py
+# Benchmarking results
+tests/benchmarks/results/
+results/
+*.json
+*.csv
+!pyproject.toml
+!package.json
+!composer.json
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,17 +16,17 @@ repos:
 
 - repo: local
   hooks:
-  #- id: ty
-    #name: ty type checker
-    #entry: uvx ty check --exclude 'tests/llm/clients/**/*.py'
-    #language: system
-    #types: [python]
-    #pass_filenames: false
-    #always_run: true
+  - id: ty
+    name: ty type checker
+    entry: uvx ty check
+    language: system
+    types: [python]
+    pass_filenames: false
+    always_run: true
 
   - id: pytest
     name: pytest
-    entry: uv run pytest
+    entry: uv run pytest --ignore=tests/benchmarks
     language: system
     pass_filenames: false
     always_run: true
diff --git a/pyproject.toml b/pyproject.toml
@@ -87,11 +87,14 @@ python_classes = ["Test*"]
 python_functions = ["test_*"]
 markers = [
     "asyncio: marks tests as async (deselect with '-m \"not asyncio\"')",
+    "benchmark: marks tests as performance benchmarks",
 ]
 asyncio_mode = "auto"
 addopts = [
     "-v",
     "--strict-markers",
+    "-m",
+    "not benchmark",
     "--cov=memori",
     "--cov-report=term-missing",
     "--cov-report=html",
@@ -116,7 +119,15 @@ exclude_lines = [
     "if TYPE_CHECKING:",
 ]
 
-[tool.ty]
+[tool.ty.src]
+exclude = [
+    "tests/llm/clients/**/*.py",
+    "**/__pycache__/**",
+]
+
+[tool.ty.environment]
+python-version = "3.12"
+
 
 [dependency-groups]
 dev = [
@@ -139,8 +150,10 @@ dev = [
     "pymysql>=1.1.2",
     "pytest>=8.4.2",
     "pytest-asyncio>=0.24.0",
+    "pytest-benchmark>=4.0.0",
     "pytest-cov>=6.0.0",
     "pytest-mock>=3.15.1",
+    "psutil>=5.9.0",
     "requests>=2.32.5",
     "ruff>=0.8.0",
     "sqlalchemy>=2.0.44",

diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md
@@ -0,0 +1,58 @@
+# AWS EC2 Benchmark Guide
+
+This guide explains how to run Memori benchmarks on an EC2 instance in the same VPC as your AWS database (RDS Postgres or MySQL).
+
+## Setup on EC2
+
+1. **SSH into EC2**:
+   ```bash
+   ssh ec2-user@your-ec2-ip
+   ```
+
+2. **Run Setup**:
+   Copy `tests/benchmarks/setup_ec2_benchmarks.sh` to your EC2 or clone the repo and run it:
+   ```bash
+   chmod +x tests/benchmarks/setup_ec2_benchmarks.sh
+   ./tests/benchmarks/setup_ec2_benchmarks.sh
+   ```
+
+## Running Benchmarks
+
+The `run_benchmarks_ec2.sh` script is flexible and handles automatic CSV generation.
+
+### Environment Variables
+
+- `DB_TYPE`: `postgres` (default) or `mysql`
+- `TEST_TYPE`: `all` (default), `end_to_end`, `db_retrieval`, `semantic_search`, `embedding`
+- `BENCHMARK_POSTGRES_URL`: Connection string for Postgres
+- `BENCHMARK_MYSQL_URL`: Connection string for MySQL
+
+### Examples
+
+#### Run all Postgres benchmarks
+```bash
+export BENCHMARK_POSTGRES_URL="CHANGEME"
+DB_TYPE=postgres TEST_TYPE=all ./tests/benchmarks/run_benchmarks_ec2.sh
+```
+
+#### Run only End-to-End MySQL benchmarks
+```bash
+export BENCHMARK_MYSQL_URL="CHANGEME"
+DB_TYPE=mysql TEST_TYPE=end_to_end ./tests/benchmarks/run_benchmarks_ec2.sh
+```
+
+## Results
+
+All results are automatically saved to the `./results` directory with a timestamp to prevent overwriting:
+- JSON output: `results_{db}_{type}_{timestamp}.json`
+- **CSV Report**: `report_{db}_{type}_{timestamp}.csv`
+
+To download the CSV reports to your local machine:
+```bash
+scp ec2-user@your-ec2-ip:~/Memori/results/report_*.csv ./local_results/
+```
+
+## Database Connection Requirements
+
+Ensure the EC2 Security Group allows outbound traffic to the database on ports 5432 (Postgres) or 3306 (MySQL).
+The database must be in the same VPC or accessible via VPC Peering/Transit Gateway.
diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py
@@ -0,0 +1,175 @@
+"""Pytest fixtures for performance benchmarks."""
+
+import os
+
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+
+from memori import Memori
+from memori.llm._embeddings import embed_texts
+from tests.benchmarks.fixtures.sample_data import (
+    generate_facts_with_size,
+    generate_sample_queries,
+)
+
+
+@pytest.fixture
+def postgres_db_connection():
+    """Create a PostgreSQL database connection factory for benchmarking (via AWS/Docker)."""
+    postgres_uri = os.environ.get(
+        "BENCHMARK_POSTGRES_URL",
+        # Matches docker-compose.yml default DB name
+        "postgresql://memori:memori@localhost:5432/memori_test",
+    )
+
+    from sqlalchemy import text
+
+    # Support SSL root certificate via environment variable (for AWS RDS)
+    connect_args = {}
+    sslrootcert = os.environ.get("BENCHMARK_POSTGRES_SSLROOTCERT")
+    if sslrootcert:
+        connect_args["sslrootcert"] = sslrootcert
+        # Ensure sslmode is set if using SSL cert
+        if "sslmode" not in postgres_uri:
+            # Add sslmode=require if not already in URI
+            separator = "&" if "?" in postgres_uri else "?"
+            postgres_uri = f"{postgres_uri}{separator}sslmode=require"
+
+    engine = create_engine(
+        postgres_uri,
+        pool_pre_ping=True,
+        pool_recycle=300,
+        connect_args=connect_args if connect_args else None,
+    )
+
+    try:
+        with engine.connect() as conn:
+            conn.execute(text("SELECT 1"))
+    except Exception as e:
+        pytest.skip(
+            f"PostgreSQL not available at {postgres_uri}: {e}. "
+            "Set BENCHMARK_POSTGRES_URL to a database that exists."
+        )
+
+    Session = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+    yield Session
+    engine.dispose()
+
+
+@pytest.fixture
+def mysql_db_connection():
+    """Create a MySQL database connection factory for benchmarking (via AWS/Docker)."""
+    mysql_uri = os.environ.get(
+        "BENCHMARK_MYSQL_URL",
+        "mysql+pymysql://memori:memori@localhost:3306/memori_test",
+    )
+
+    from sqlalchemy import text
+
+    engine = create_engine(
+        mysql_uri,
+        pool_pre_ping=True,
+        pool_recycle=300,
+    )
+
+    try:
+        with engine.connect() as conn:
+            conn.execute(text("SELECT 1"))
+    except Exception as e:
+        pytest.skip(f"MySQL not available at {mysql_uri}: {e}")
+
+    Session = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+
+    yield Session
+    engine.dispose()
+
+
+@pytest.fixture(
+    params=["postgres", "mysql"],
+    ids=["postgres", "mysql"],
+)
+def db_connection(request):
+    """Parameterized fixture for realistic database types (no SQLite)."""
+    db_type = request.param
+
+    if db_type == "postgres":
+        return request.getfixturevalue("postgres_db_connection")
+    elif db_type == "mysql":
+        return request.getfixturevalue("mysql_db_connection")
+
+    pytest.skip(f"Unsupported benchmark database type: {db_type}")
+
+
+@pytest.fixture
+def memori_instance(db_connection, request):
+    """Create a Memori instance with the specified database for benchmarking."""
+    mem = Memori(conn=db_connection)
+    mem.config.storage.build()
+
+    db_type_param = None
+    for marker in request.node.iter_markers("parametrize"):
+        if "db_connection" in marker.args[0]:
+            db_type_param = marker.args[1][0] if marker.args[1] else None
+            break
+
+    # Try to infer from connection
+    if not db_type_param:
+        try:
+            # SQLAlchemy sessionmaker is callable, so detect it first by presence of a bind.
+            bind = getattr(db_connection, "kw", {}).get("bind", None)
+            if bind is not None:
+                db_type_param = bind.dialect.name
+            else:
+                db_type_param = "unknown"
+        except Exception:
+            db_type_param = "unknown"
+
+    mem._benchmark_db_type = db_type_param  # ty: ignore[unresolved-attribute]
+    return mem
+
+
+@pytest.fixture
+def sample_queries():
+    """Provide sample queries of varying lengths."""
+    return generate_sample_queries()
+
+
+@pytest.fixture
+def fact_content_size():
+    """Fixture for fact content size.
+
+    Note: Embeddings are always 768 dimensions (3072 bytes binary) regardless of text size.
+    """
+    return "small"
+
+
+@pytest.fixture(
+    params=[5, 50, 100, 300, 600, 1000],
+    ids=lambda x: f"n{x}",
+)
+def entity_with_n_facts(memori_instance, fact_content_size, request):
+    """Create an entity with N facts for benchmarking database retrieval."""
+    fact_count = request.param
+    entity_id = f"benchmark-entity-{fact_count}-{fact_content_size}"
+    memori_instance.attribution(entity_id=entity_id, process_id="benchmark-process")
+
+    facts = generate_facts_with_size(fact_count, fact_content_size)
+    fact_embeddings = embed_texts(facts)
+
+    entity_db_id = memori_instance.config.storage.driver.entity.create(entity_id)
+    memori_instance.config.storage.driver.entity_fact.create(
+        entity_db_id, facts, fact_embeddings
+    )
+
+    db_type = getattr(memori_instance, "_benchmark_db_type", "unknown")
+
+    return {
+        "entity_id": entity_id,
+        "entity_db_id": entity_db_id,
+        "fact_count": fact_count,
+        "content_size": fact_content_size,
+        "db_type": db_type,
+        "facts": facts,
+    }