diff --git a/packages/backend/app/__init__.py b/packages/backend/app/__init__.py index cdf76b45..bac97df1 100644 --- a/packages/backend/app/__init__.py +++ b/packages/backend/app/__init__.py @@ -2,6 +2,7 @@ from .config import Settings from .extensions import db, jwt from .routes import register_routes +from .db.index_report import register_index_cli from .observability import ( Observability, configure_logging, @@ -51,6 +52,7 @@ def create_app(settings: Settings | None = None) -> Flask: # Redis (already global) # Blueprint routes register_routes(app) + register_index_cli(app) # Backward-compatible schema patch for existing databases. with app.app_context(): diff --git a/packages/backend/app/db/index_report.py b/packages/backend/app/db/index_report.py new file mode 100644 index 00000000..ba2bf29d --- /dev/null +++ b/packages/backend/app/db/index_report.py @@ -0,0 +1,91 @@ +""" +Database index report utility (Issue #128). + +Run with: flask index-report + +Reports: current indexes, missing recommended indexes, table sizes. +Helps track index health in production. +""" + +from __future__ import annotations + +import click +from flask import Flask +from flask.cli import with_appcontext + +from ..extensions import db + +# Expected indexes — checked at startup in dev/test to catch regressions +REQUIRED_INDEXES = [ + "idx_expenses_user_spent_at", + "idx_expenses_user_category", + "idx_expenses_user_type_date", + "idx_expenses_user_month", + "idx_bills_user_due", + "idx_bills_user_active_due", + "idx_reminders_due", + "idx_reminders_pending_dispatch", + "idx_recurring_expenses_user_start", + "idx_recurring_expenses_active", + "idx_categories_user_name", + "idx_audit_logs_user_created", + "idx_audit_logs_action_created", +] + + +def get_existing_indexes(engine) -> list[str]: + """Query pg_indexes for all FinMind indexes.""" + if engine.dialect.name != "postgresql": + return [] + with engine.connect() as conn: + result = conn.execute( + db.text( + """ + SELECT indexname FROM pg_indexes + WHERE schemaname = 'public' + ORDER BY indexname + """ + ) + ) + return [row[0] for row in result] + + +def check_missing_indexes(engine) -> list[str]: + """Return list of recommended indexes not yet present.""" + existing = set(get_existing_indexes(engine)) + return [idx for idx in REQUIRED_INDEXES if idx not in existing] + + +def register_index_cli(app: Flask) -> None: + """Register the flask index-report CLI command.""" + + @app.cli.command("index-report") + @with_appcontext + def index_report(): + """Print a report of database indexes and missing recommendations.""" + engine = db.engine + if engine.dialect.name != "postgresql": + click.echo("Index report only supported on PostgreSQL.") + return + + existing = get_existing_indexes(engine) + missing = check_missing_indexes(engine) + + click.echo(f"\n{'='*60}") + click.echo("FinMind Database Index Report") + click.echo(f"{'='*60}") + click.echo(f"\nTotal indexes: {len(existing)}") + for idx in sorted(existing): + status = "✅" if idx in REQUIRED_INDEXES else " " + click.echo(f" {status} {idx}") + + if missing: + click.echo(f"\n⚠️ Missing recommended indexes ({len(missing)}):") + for idx in missing: + click.echo(f" ❌ {idx}") + click.echo( + "\nRun migration 007_db_indexing.sql to add missing indexes." + ) + else: + click.echo("\n✅ All recommended indexes are present.") + click.echo(f"{'='*60}\n") diff --git a/packages/backend/app/db/migrations/007_db_indexing.sql b/packages/backend/app/db/migrations/007_db_indexing.sql new file mode 100644 index 00000000..a2e2806e --- /dev/null +++ b/packages/backend/app/db/migrations/007_db_indexing.sql @@ -0,0 +1,122 @@ +-- Migration 007: Database indexing optimization for financial queries +-- Issue #128 +-- Apply with: psql $DATABASE_URL -f migrations/007_db_indexing.sql +-- +-- Analysis: most expensive query patterns in FinMind +-- 1. GET /expenses?from=&to=&category_id= → composite filter on user+date+category +-- 2. GET /insights (monthly aggregates) → group by user+month on expenses +-- 3. GET /bills (active only, ordered) → filter active, sort by due date +-- 4. GET /reminders/run (pending dispatch) → filter sent=false, send_at <= now +-- 5. GET /categories (user lookup) → filter by user_id +-- 6. Recurring expense generation → filter by user+active+cadence +-- 7. Audit log queries → filter by user_id + action + + +-- ── expenses ──────────────────────────────────────────────────────────────── + +-- Basic user+date index — covers simple date-range queries that carry no +-- type or category filter (e.g. GET /expenses?from=&to=). +CREATE INDEX IF NOT EXISTS idx_expenses_user_spent_at + ON expenses (user_id, spent_at DESC); + +-- Category-filtered expense queries (GET /expenses?category_id=X) +CREATE INDEX IF NOT EXISTS idx_expenses_user_category + ON expenses (user_id, category_id, spent_at DESC) + WHERE category_id IS NOT NULL; + +-- Income vs Expense type split (used by insights/dashboard) +CREATE INDEX IF NOT EXISTS idx_expenses_user_type_date + ON expenses (user_id, expense_type, spent_at DESC); + +-- Monthly aggregation (used by insights: GROUP BY year/month) +-- NOTE: DATE_TRUNC is a PostgreSQL-specific function. This index will be +-- created successfully only on PostgreSQL and will fail on SQLite or MySQL. +-- On SQLite the test suite skips index-existence checks (see index_report.py). +CREATE INDEX IF NOT EXISTS idx_expenses_user_month + ON expenses (user_id, DATE_TRUNC('month', spent_at)); + +-- Recurring source lookups (cascades, dedup checks) +CREATE INDEX IF NOT EXISTS idx_expenses_source_recurring + ON expenses (source_recurring_id) + WHERE source_recurring_id IS NOT NULL; + +-- Amount range queries (future: analytics, budgeting) +CREATE INDEX IF NOT EXISTS idx_expenses_user_amount + ON expenses (user_id, amount); + + +-- ── bills ──────────────────────────────────────────────────────────────────── + +-- Full (non-partial) user+due-date index — covers queries that do not +-- filter on active, e.g. admin views and historical reporting. +CREATE INDEX IF NOT EXISTS idx_bills_user_due + ON bills (user_id, next_due_date); + +-- Active bills only (most queries filter active=TRUE) +CREATE INDEX IF NOT EXISTS idx_bills_user_active_due + ON bills (user_id, next_due_date) + WHERE active = TRUE; + +-- Autopay-enabled bills (background job filter) +CREATE INDEX IF NOT EXISTS idx_bills_autopay + ON bills (user_id, next_due_date) + WHERE autopay_enabled = TRUE AND active = TRUE; + + +-- ── reminders ──────────────────────────────────────────────────────────────── + +-- Full user+send_at index — supports per-user reminder listing and +-- queries that don't filter on sent status (e.g. history views). +CREATE INDEX IF NOT EXISTS idx_reminders_due + ON reminders (user_id, send_at); + +-- Pending reminders — the hot path for reminder dispatch job +-- Partial index: only unsent, non-permanently-failed rows +CREATE INDEX IF NOT EXISTS idx_reminders_pending_dispatch + ON reminders (send_at, user_id) + WHERE sent = FALSE; + +-- Bill-reminder relationship lookups +CREATE INDEX IF NOT EXISTS idx_reminders_bill + ON reminders (bill_id) + WHERE bill_id IS NOT NULL; + + +-- ── recurring_expenses ──────────────────────────────────────────────────────── + +-- Full user+start_date index — supports historical queries and admin views +-- that list all recurring expenses regardless of active status. +CREATE INDEX IF NOT EXISTS idx_recurring_expenses_user_start + ON recurring_expenses (user_id, start_date); + +-- Active recurring expenses for generation jobs +CREATE INDEX IF NOT EXISTS idx_recurring_expenses_active + ON recurring_expenses (user_id, cadence, start_date) + WHERE active = TRUE; + + +-- ── categories ─────────────────────────────────────────────────────────────── + +-- User category listing (typically ordered by name) +CREATE INDEX IF NOT EXISTS idx_categories_user_name + ON categories (user_id, name); + + +-- ── audit_logs ─────────────────────────────────────────────────────────────── + +-- User audit log queries (GDPR export, per-user audit) +CREATE INDEX IF NOT EXISTS idx_audit_logs_user_created + ON audit_logs (user_id, created_at DESC) + WHERE user_id IS NOT NULL; + +-- Action-based queries (monitoring, alerting) +CREATE INDEX IF NOT EXISTS idx_audit_logs_action_created + ON audit_logs (action, created_at DESC); + + +-- ── user_subscriptions ──────────────────────────────────────────────────────── + +-- Active subscription lookup (feature gating) +CREATE INDEX IF NOT EXISTS idx_user_subscriptions_active + ON user_subscriptions (user_id, active) + WHERE active = TRUE; diff --git a/packages/backend/app/routes/__init__.py b/packages/backend/app/routes/__init__.py index f13b0f89..0f4218ed 100644 --- a/packages/backend/app/routes/__init__.py +++ b/packages/backend/app/routes/__init__.py @@ -7,6 +7,7 @@ from .categories import bp as categories_bp from .docs import bp as docs_bp from .dashboard import bp as dashboard_bp +from .admin import bp as admin_bp def register_routes(app: Flask): @@ -18,3 +19,4 @@ def register_routes(app: Flask): app.register_blueprint(categories_bp, url_prefix="/categories") app.register_blueprint(docs_bp, url_prefix="/docs") app.register_blueprint(dashboard_bp, url_prefix="/dashboard") + app.register_blueprint(admin_bp, url_prefix="/admin") diff --git a/packages/backend/app/routes/admin.py b/packages/backend/app/routes/admin.py new file mode 100644 index 00000000..491d2df8 --- /dev/null +++ b/packages/backend/app/routes/admin.py @@ -0,0 +1,61 @@ +""" +Admin endpoints (Issue #128). + +GET /admin/db/indexes — return the index report as JSON (same data as + the `flask index-report` CLI command). +""" + +from flask import Blueprint, jsonify +from flask_jwt_extended import get_jwt_identity, jwt_required + +from ..db.index_report import REQUIRED_INDEXES, check_missing_indexes, get_existing_indexes +from ..extensions import db +from ..models import User + +bp = Blueprint("admin", __name__) + + +def _require_admin(): + """Return (user, error_response) — error_response is None if user is ADMIN.""" + uid = int(get_jwt_identity()) + user = db.session.get(User, uid) + if not user or user.role != "ADMIN": + return None, (jsonify(error="admin access required"), 403) + return user, None + + +@bp.get("/db/indexes") +@jwt_required() +def db_index_report(): + """ + Return the database index health report as JSON. + + Requires ADMIN role. On non-PostgreSQL engines (e.g. SQLite in tests) + returns an empty existing list with all REQUIRED_INDEXES flagged as missing. + + Response shape: + { + "existing": ["idx_name", ...], + "required": ["idx_name", ...], + "missing": ["idx_name", ...], + "extra": ["idx_name", ...], + "healthy": true|false + } + """ + _, err = _require_admin() + if err: + return err + + engine = db.engine + existing = get_existing_indexes(engine) + missing = check_missing_indexes(engine) + required_set = set(REQUIRED_INDEXES) + existing_set = set(existing) + + return jsonify({ + "existing": sorted(existing), + "required": sorted(REQUIRED_INDEXES), + "missing": sorted(missing), + "extra": sorted(existing_set - required_set), + "healthy": len(missing) == 0, + }) diff --git a/packages/backend/tests/test_db_indexes.py b/packages/backend/tests/test_db_indexes.py new file mode 100644 index 00000000..46384dba --- /dev/null +++ b/packages/backend/tests/test_db_indexes.py @@ -0,0 +1,215 @@ +""" +Tests for database indexing optimization (Issue #128). + +For SQLite test DBs we verify the index_report utility logic. +For PostgreSQL we verify the actual indexes are created. +Integration tests confirm that indexed query patterns return correct results. +""" + +from __future__ import annotations + +import pytest +from app.db.index_report import REQUIRED_INDEXES, check_missing_indexes +from app.extensions import db + + +class TestIndexReport: + def test_required_indexes_list_nonempty(self): + assert len(REQUIRED_INDEXES) > 0 + + def test_required_indexes_are_strings(self): + for idx in REQUIRED_INDEXES: + assert isinstance(idx, str) + assert idx.startswith("idx_") + + def test_check_missing_returns_list(self, app_fixture): + with app_fixture.app_context(): + # On SQLite this returns [] (not PostgreSQL) + # On PostgreSQL it checks real indexes + missing = check_missing_indexes(db.engine) + assert isinstance(missing, list) + + def test_critical_indexes_documented(self): + """Verify all critical query paths have a corresponding index entry.""" + idx_set = set(REQUIRED_INDEXES) + + # Expenses — the hottest table + assert "idx_expenses_user_spent_at" in idx_set # basic date-range + assert "idx_expenses_user_type_date" in idx_set # income/expense split + assert "idx_expenses_user_category" in idx_set # category filter + assert "idx_expenses_user_month" in idx_set # monthly aggregation + + # Bills — both the partial (active=TRUE) and full index + assert "idx_bills_user_due" in idx_set # full; historical/admin + assert "idx_bills_user_active_due" in idx_set # partial; most queries + + # Reminders — full and partial + assert "idx_reminders_due" in idx_set # full; per-user listing + assert "idx_reminders_pending_dispatch" in idx_set # partial; dispatch job + + # Recurring expenses + assert "idx_recurring_expenses_user_start" in idx_set # full; history + assert "idx_recurring_expenses_active" in idx_set # partial; generation + + # Audit + assert "idx_audit_logs_user_created" in idx_set + assert "idx_audit_logs_action_created" in idx_set + + def test_required_indexes_match_migration(self): + """REQUIRED_INDEXES must not contain index names absent from the migration. + + This is a static regression guard: if the migration and the Python list + drift apart, ``flask index-report`` will always show false positives. + Any name added to REQUIRED_INDEXES must have a corresponding + CREATE INDEX … statement in 007_db_indexing.sql. + """ + import os, re + + migration_path = os.path.join( + os.path.dirname(__file__), + "../app/db/migrations/007_db_indexing.sql", + ) + with open(migration_path) as f: + sql = f.read() + + # Extract every index name defined by a CREATE INDEX statement + defined_in_migration = set(re.findall(r"CREATE INDEX IF NOT EXISTS (\w+)", sql)) + + not_in_migration = [ + idx for idx in REQUIRED_INDEXES if idx not in defined_in_migration + ] + assert not_in_migration == [], ( + f"REQUIRED_INDEXES entries missing from migration: {not_in_migration}" + ) + + def test_date_trunc_index_is_postgres_only(self): + """idx_expenses_user_month uses DATE_TRUNC — must be documented as PG-only.""" + import os + + migration_path = os.path.join( + os.path.dirname(__file__), + "../app/db/migrations/007_db_indexing.sql", + ) + with open(migration_path) as f: + sql = f.read() + + # Find the block containing the DATE_TRUNC index and assert there is a + # PostgreSQL warning comment in the preceding lines. + idx = sql.find("idx_expenses_user_month") + assert idx != -1, "idx_expenses_user_month not found in migration" + + # Look at the 400 characters before the CREATE INDEX statement + context = sql[max(0, idx - 400):idx] + assert "postgresql" in context.lower() or "postgres" in context.lower(), ( + "Migration must document that idx_expenses_user_month requires PostgreSQL " + "(DATE_TRUNC is not supported on SQLite/MySQL)" + ) + + +class TestIndexedQueryPatterns: + """Verify that the query patterns benefiting from indexes work correctly.""" + + EMAIL = "idx@test.com" + + def _auth(self, client): + client.post("/auth/register", json={"email": self.EMAIL, "password": "pass"}) + r = client.post("/auth/login", json={"email": self.EMAIL, "password": "pass"}) + return {"Authorization": f"Bearer {r.get_json()['access_token']}"} + + def test_expense_filter_by_date_range(self, client, app_fixture): + h = self._auth(client) + client.post("/expenses", json={"amount": 100, "description": "A", "date": "2026-01-01"}, headers=h) + client.post("/expenses", json={"amount": 200, "description": "B", "date": "2026-03-01"}, headers=h) + r = client.get("/expenses?from=2026-01-01&to=2026-02-28", headers=h) + assert r.status_code == 200 + items = r.get_json() + assert all(i["date"] <= "2026-02-28" for i in items) + + def test_expense_filter_by_category(self, client, app_fixture): + h = self._auth(client) + # Create category + cat_r = client.post("/categories", json={"name": "Food"}, headers=h) + if cat_r.status_code in (200, 201): + cat_id = cat_r.get_json().get("id") + client.post("/expenses", json={ + "amount": 50, "description": "Lunch", + "date": "2026-01-15", "category_id": cat_id, + }, headers=h) + r = client.get(f"/expenses?category_id={cat_id}", headers=h) + assert r.status_code == 200 + + def test_bills_list_active_only(self, client, app_fixture): + h = self._auth(client) + r = client.get("/bills", headers=h) + assert r.status_code == 200 + # All returned bills should be active + for b in r.get_json(): + assert b.get("active", True) is True + + def test_reminders_pending_query(self, client, app_fixture): + h = self._auth(client) + r = client.post("/reminders/run", headers=h) + assert r.status_code == 200 + + def test_categories_user_list(self, client, app_fixture): + h = self._auth(client) + r = client.get("/categories", headers=h) + assert r.status_code == 200 + assert isinstance(r.get_json(), list) + + +# ───────────────────────────────────────────────────────────────────────────── +# Admin HTTP endpoint — GET /admin/db/indexes +# ───────────────────────────────────────────────────────────────────────────── + +class TestAdminDbIndexesEndpoint: + def _admin_auth(self, client, app_fixture): + email = "admin_idx@test.com" + client.post("/auth/register", json={"email": email, "password": "pass1234"}) + # Elevate to ADMIN directly in DB + from app.models import User + from app.extensions import db + with app_fixture.app_context(): + u = db.session.query(User).filter_by(email=email).first() + u.role = "ADMIN" + db.session.commit() + r = client.post("/auth/login", json={"email": email, "password": "pass1234"}) + return {"Authorization": f"Bearer {r.get_json()['access_token']}"} + + def _user_auth(self, client): + email = "plain_idx@test.com" + client.post("/auth/register", json={"email": email, "password": "pass1234"}) + r = client.post("/auth/login", json={"email": email, "password": "pass1234"}) + return {"Authorization": f"Bearer {r.get_json()['access_token']}"} + + def test_requires_auth(self, client, app_fixture): + assert client.get("/admin/db/indexes").status_code == 401 + + def test_requires_admin_role(self, client, app_fixture): + h = self._user_auth(client) + r = client.get("/admin/db/indexes", headers=h) + assert r.status_code == 403 + + def test_admin_returns_200(self, client, app_fixture): + h = self._admin_auth(client, app_fixture) + r = client.get("/admin/db/indexes", headers=h) + assert r.status_code == 200 + + def test_response_structure(self, client, app_fixture): + h = self._admin_auth(client, app_fixture) + d = client.get("/admin/db/indexes", headers=h).get_json() + assert "existing" in d + assert "required" in d + assert "missing" in d + assert "extra" in d + assert "healthy" in d + assert isinstance(d["existing"], list) + assert isinstance(d["required"], list) + assert isinstance(d["missing"], list) + assert isinstance(d["healthy"], bool) + + def test_required_list_matches_constant(self, client, app_fixture): + h = self._admin_auth(client, app_fixture) + d = client.get("/admin/db/indexes", headers=h).get_json() + from app.db.index_report import REQUIRED_INDEXES + assert set(d["required"]) == set(REQUIRED_INDEXES)