|
| 1 | +import os |
| 2 | +from datetime import datetime, timezone |
| 3 | +from pathlib import Path |
| 4 | +from typing import Any, Dict, List, Optional |
| 5 | + |
| 6 | +from loguru import logger |
| 7 | +from sqlalchemy import inspect, text |
| 8 | + |
| 9 | +from services.database import ( |
| 10 | + WORKSPACE_DIR, |
| 11 | + get_all_user_ids, |
| 12 | + get_engine_for_user, |
| 13 | + get_session_for_user, |
| 14 | + get_user_db_path, |
| 15 | + init_database, |
| 16 | + default_engine, |
| 17 | +) |
| 18 | + |
| 19 | +_REQUIRED_SCHEMA: Dict[str, List[str]] = { |
| 20 | + "onboarding_sessions": ["id", "user_id", "updated_at"], |
| 21 | + "daily_workflow_plans": ["id", "user_id", "generation_mode", "fallback_used"], |
| 22 | +} |
| 23 | + |
| 24 | +_STARTUP_STATUS: Dict[str, Any] = { |
| 25 | + "status": "unknown", |
| 26 | + "mode": "multi_tenant" if default_engine is None else "single_tenant", |
| 27 | + "checks": [], |
| 28 | + "errors": [], |
| 29 | + "warnings": [], |
| 30 | + "checked_at": None, |
| 31 | +} |
| 32 | + |
| 33 | + |
| 34 | +def _env_true(name: str, default: bool = False) -> bool: |
| 35 | + raw = os.getenv(name) |
| 36 | + if raw is None: |
| 37 | + return default |
| 38 | + return raw.strip().lower() in {"1", "true", "yes", "y", "on"} |
| 39 | + |
| 40 | + |
| 41 | +def should_fail_fast() -> bool: |
| 42 | + if os.getenv("ALWRITY_FAIL_FAST_STARTUP") is not None: |
| 43 | + return _env_true("ALWRITY_FAIL_FAST_STARTUP", default=False) |
| 44 | + app_env = os.getenv("APP_ENV", os.getenv("ENV", "")).strip().lower() |
| 45 | + return app_env in {"prod", "production"} |
| 46 | + |
| 47 | + |
| 48 | +def _record_check(checks: List[Dict[str, Any]], name: str, ok: bool, detail: str) -> None: |
| 49 | + checks.append({"name": name, "ok": ok, "detail": detail}) |
| 50 | + |
| 51 | + |
| 52 | +def _check_workspace_root(checks: List[Dict[str, Any]], errors: List[str]) -> None: |
| 53 | + workspace = Path(WORKSPACE_DIR) |
| 54 | + if not workspace.exists(): |
| 55 | + errors.append(f"Workspace root does not exist: {workspace}") |
| 56 | + _record_check(checks, "workspace_root_exists", False, str(workspace)) |
| 57 | + return |
| 58 | + |
| 59 | + _record_check(checks, "workspace_root_exists", True, str(workspace)) |
| 60 | + |
| 61 | + if not os.access(workspace, os.W_OK): |
| 62 | + errors.append(f"Workspace root is not writable: {workspace}") |
| 63 | + _record_check(checks, "workspace_root_writable", False, str(workspace)) |
| 64 | + return |
| 65 | + |
| 66 | + probe_file = workspace / ".startup_health_write_probe" |
| 67 | + try: |
| 68 | + probe_file.write_text("ok", encoding="utf-8") |
| 69 | + probe_file.unlink(missing_ok=True) |
| 70 | + _record_check(checks, "workspace_root_writable", True, "write probe passed") |
| 71 | + except Exception as exc: |
| 72 | + errors.append(f"Workspace root write probe failed: {exc}") |
| 73 | + _record_check(checks, "workspace_root_writable", False, f"write probe failed: {exc}") |
| 74 | + |
| 75 | + |
| 76 | +def _check_schema_for_user(user_id: str, checks: List[Dict[str, Any]], errors: List[str]) -> None: |
| 77 | + engine = get_engine_for_user(user_id) |
| 78 | + inspector = inspect(engine) |
| 79 | + |
| 80 | + for table, columns in _REQUIRED_SCHEMA.items(): |
| 81 | + if not inspector.has_table(table): |
| 82 | + errors.append(f"Missing required table '{table}' in tenant DB for user '{user_id}'") |
| 83 | + _record_check(checks, f"schema_{table}", False, f"table missing for {user_id}") |
| 84 | + continue |
| 85 | + |
| 86 | + existing_columns = {col["name"] for col in inspector.get_columns(table)} |
| 87 | + missing_columns = [col for col in columns if col not in existing_columns] |
| 88 | + if missing_columns: |
| 89 | + errors.append( |
| 90 | + f"Missing required columns in '{table}' for user '{user_id}': {', '.join(missing_columns)}" |
| 91 | + ) |
| 92 | + _record_check( |
| 93 | + checks, |
| 94 | + f"schema_{table}", |
| 95 | + False, |
| 96 | + f"missing columns for {user_id}: {', '.join(missing_columns)}", |
| 97 | + ) |
| 98 | + else: |
| 99 | + _record_check(checks, f"schema_{table}", True, f"schema ok for {user_id}") |
| 100 | + |
| 101 | + |
| 102 | +def _check_db_access(checks: List[Dict[str, Any]], errors: List[str], warnings: List[str]) -> Optional[str]: |
| 103 | + if default_engine is not None: |
| 104 | + try: |
| 105 | + init_database() |
| 106 | + with default_engine.connect() as conn: |
| 107 | + conn.execute(text("SELECT 1")) |
| 108 | + _record_check(checks, "single_tenant_db_connectivity", True, "SELECT 1 succeeded") |
| 109 | + return "single_tenant" |
| 110 | + except Exception as exc: |
| 111 | + errors.append(f"Single-tenant database check failed: {exc}") |
| 112 | + _record_check(checks, "single_tenant_db_connectivity", False, str(exc)) |
| 113 | + return None |
| 114 | + |
| 115 | + user_ids = get_all_user_ids() |
| 116 | + candidate_user = user_ids[0] if user_ids else "startup_synthetic" |
| 117 | + |
| 118 | + try: |
| 119 | + db_path = get_user_db_path(candidate_user) |
| 120 | + _record_check(checks, "tenant_db_path_resolution", True, f"{candidate_user} -> {db_path}") |
| 121 | + except Exception as exc: |
| 122 | + errors.append(f"Tenant DB path resolution failed: {exc}") |
| 123 | + _record_check(checks, "tenant_db_path_resolution", False, str(exc)) |
| 124 | + return None |
| 125 | + |
| 126 | + try: |
| 127 | + session = get_session_for_user(candidate_user) |
| 128 | + if not session: |
| 129 | + raise RuntimeError("session creation returned None") |
| 130 | + session.execute(text("SELECT 1")) |
| 131 | + _record_check(checks, "tenant_session_create", True, f"session opened for {candidate_user}") |
| 132 | + session.close() |
| 133 | + except Exception as exc: |
| 134 | + errors.append(f"Tenant DB open/create check failed for '{candidate_user}': {exc}") |
| 135 | + _record_check(checks, "tenant_session_create", False, str(exc)) |
| 136 | + return None |
| 137 | + |
| 138 | + if not user_ids: |
| 139 | + warnings.append( |
| 140 | + "No existing tenant workspace found during startup; synthetic tenant DB path was used for readiness validation." |
| 141 | + ) |
| 142 | + |
| 143 | + _check_schema_for_user(candidate_user, checks, errors) |
| 144 | + return candidate_user |
| 145 | + |
| 146 | + |
| 147 | +def run_startup_health_routine() -> Dict[str, Any]: |
| 148 | + checks: List[Dict[str, Any]] = [] |
| 149 | + errors: List[str] = [] |
| 150 | + warnings: List[str] = [] |
| 151 | + |
| 152 | + _check_workspace_root(checks, errors) |
| 153 | + if not errors: |
| 154 | + _check_db_access(checks, errors, warnings) |
| 155 | + |
| 156 | + status = "healthy" if not errors else "failed" |
| 157 | + report = { |
| 158 | + "status": status, |
| 159 | + "mode": "multi_tenant" if default_engine is None else "single_tenant", |
| 160 | + "checks": checks, |
| 161 | + "errors": errors, |
| 162 | + "warnings": warnings, |
| 163 | + "checked_at": datetime.now(timezone.utc).isoformat(), |
| 164 | + } |
| 165 | + |
| 166 | + _STARTUP_STATUS.update(report) |
| 167 | + |
| 168 | + if errors: |
| 169 | + for message in errors: |
| 170 | + logger.error(f"Startup readiness check failed: {message}") |
| 171 | + for warning in warnings: |
| 172 | + logger.warning(f"Startup readiness warning: {warning}") |
| 173 | + |
| 174 | + if errors and should_fail_fast(): |
| 175 | + raise RuntimeError("Startup readiness checks failed") |
| 176 | + |
| 177 | + return report |
| 178 | + |
| 179 | + |
| 180 | +def get_startup_status() -> Dict[str, Any]: |
| 181 | + return dict(_STARTUP_STATUS) |
| 182 | + |
| 183 | + |
| 184 | +def readiness_under_auth_context(current_user: Dict[str, Any]) -> Dict[str, Any]: |
| 185 | + user_id = (current_user or {}).get("id") or (current_user or {}).get("clerk_user_id") |
| 186 | + if not user_id: |
| 187 | + return { |
| 188 | + "ready": False, |
| 189 | + "reason": "missing_user_context", |
| 190 | + "detail": "No authenticated user id was provided in auth context.", |
| 191 | + } |
| 192 | + |
| 193 | + try: |
| 194 | + db_path = get_user_db_path(user_id) |
| 195 | + session = get_session_for_user(user_id) |
| 196 | + if not session: |
| 197 | + raise RuntimeError("Session creation returned None") |
| 198 | + session.execute(text("SELECT 1")) |
| 199 | + session.close() |
| 200 | + return { |
| 201 | + "ready": True, |
| 202 | + "user_id": user_id, |
| 203 | + "tenant_db_path": db_path, |
| 204 | + "db_session": "ok", |
| 205 | + } |
| 206 | + except Exception as exc: |
| 207 | + logger.error(f"Readiness auth-context DB check failed for user '{user_id}': {exc}") |
| 208 | + return { |
| 209 | + "ready": False, |
| 210 | + "user_id": user_id, |
| 211 | + "tenant_db_path": get_user_db_path(user_id), |
| 212 | + "db_session": "failed", |
| 213 | + "reason": str(exc), |
| 214 | + } |
0 commit comments