diff --git a/docker/standalone/Dockerfile b/docker/standalone/Dockerfile index ff247315a..def380102 100644 --- a/docker/standalone/Dockerfile +++ b/docker/standalone/Dockerfile @@ -21,8 +21,9 @@ WORKDIR /app/api # Sync dependencies (will create lock file if needed) RUN uv sync -# Copy source code +# Copy source code and alembic migrations COPY hindsight-api/hindsight_api ./hindsight_api +COPY hindsight-api/alembic ./alembic # Build TypeScript SDK FROM node:20-alpine AS sdk-builder @@ -70,6 +71,7 @@ RUN apt-get update && apt-get install -y \ libxml2 \ libssl3 \ libgssapi-krb5-2 \ + libossp-uuid16 \ && apt-get install -y libicu72 || apt-get install -y libicu74 || apt-get install -y libicu* \ && curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ && apt-get install -y nodejs \ @@ -97,6 +99,10 @@ COPY --from=cp-builder /app/.next ./.next COPY --from=cp-builder /app/public ./public COPY --from=cp-builder /app/next.config.ts ./next.config.ts +# For standalone mode, static files must be in .next/standalone/.next/static +RUN cp -r .next/static .next/standalone/.next/static +RUN cp -r public .next/standalone/public + WORKDIR /app # Copy startup script @@ -109,24 +115,20 @@ RUN mkdir -p /app/data && chown -R hindsight:hindsight /app # Switch to non-root user USER hindsight -# Install pg0 -RUN curl -fsSL https://raw.githubusercontent.com/vectorize-io/pg0/main/install.sh | bash - -# Start pg0 once to verify it works and pre-download PostgreSQL libraries -RUN pg0 --help && \ - pg0 start --wait && \ - pg0 stop - -# Expose ports -EXPOSE 8888 3000 - -# Environment variables +# Environment variables (set PATH early so pg0 is accessible after install) +ENV PATH="/home/hindsight/.local/bin:/app/api/.venv/bin:${PATH}" ENV HINDSIGHT_API_HOST=0.0.0.0 ENV HINDSIGHT_API_PORT=8888 ENV HINDSIGHT_API_LOG_LEVEL=info ENV NODE_ENV=production ENV HINDSIGHT_CP_DATAPLANE_API_URL=http://localhost:8888 -ENV PATH="/home/hindsight/.local/bin:/app/api/.venv/bin:${PATH}" + +# Install pg0 CLI (the API will handle starting PostgreSQL at runtime) +RUN curl -fsSL https://raw.githubusercontent.com/vectorize-io/pg0/main/install.sh | bash && \ + pg0 --help + +# Expose ports +EXPOSE 8888 3000 # Run startup script CMD ["/app/start-all.sh"] diff --git a/hindsight-api/hindsight_api/pg0.py b/hindsight-api/hindsight_api/pg0.py index 265fead20..43a69267e 100644 --- a/hindsight-api/hindsight_api/pg0.py +++ b/hindsight-api/hindsight_api/pg0.py @@ -133,6 +133,12 @@ def __init__( self._process: Optional[subprocess.Popen] = None + def _construct_uri(self) -> str: + """Construct PostgreSQL connection URI from instance settings.""" + from urllib.parse import quote_plus + password_encoded = quote_plus(self.password) + return f"postgresql://{self.username}:{password_encoded}@localhost:{self.port}/{self.database}" + def is_installed(self) -> bool: """Check if the embedded-postgres CLI is installed.""" return self.binary_path.exists() and os.access(self.binary_path, os.X_OK) @@ -190,8 +196,8 @@ def _run_command(self, *args: str, capture_output: bool = True) -> subprocess.Co text=True, ) - async def _run_command_async(self, *args: str) -> tuple[int, str, str]: - """Run an embedded-postgres command asynchronously.""" + async def _run_command_async(self, *args: str, timeout: float = 60.0) -> tuple[int, str, str]: + """Run an embedded-postgres command asynchronously with timeout.""" cmd = [str(self.binary_path), *args] process = await asyncio.create_subprocess_exec( @@ -200,8 +206,20 @@ async def _run_command_async(self, *args: str) -> tuple[int, str, str]: stderr=asyncio.subprocess.PIPE, ) - stdout, stderr = await process.communicate() - return process.returncode, stdout.decode(), stderr.decode() + try: + stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout) + return process.returncode or 0, stdout.decode(), stderr.decode() + except asyncio.TimeoutError: + # If timeout, pg0 may have started PostgreSQL but is hanging on communicate() + # This happens because PostgreSQL inherits the file descriptors + try: + process.kill() + await process.wait() + except ProcessLookupError: + # Process already exited, which is fine + pass + logger.warning(f"pg0 command timed out after {timeout}s, continuing...") + return 0, "", "" async def start(self) -> str: """ @@ -236,8 +254,32 @@ async def start(self) -> str: logger.info("Embedded PostgreSQL started") - # Get and return the URI - return await self.get_uri() + # Wait for PostgreSQL to be ready to accept connections + uri = self._construct_uri() + await self._wait_for_postgres(uri) + + return uri + + async def _wait_for_postgres(self, uri: str, timeout: float = 60.0) -> None: + """Wait for PostgreSQL to be ready to accept connections.""" + import asyncpg + start_time = asyncio.get_event_loop().time() + last_error = None + + while (asyncio.get_event_loop().time() - start_time) < timeout: + try: + conn = await asyncio.wait_for( + asyncpg.connect(uri), + timeout=5.0 + ) + await conn.close() + logger.info("PostgreSQL is ready to accept connections") + return + except Exception as e: + last_error = e + await asyncio.sleep(1.0) + + raise RuntimeError(f"PostgreSQL failed to become ready within {timeout}s: {last_error}") async def stop(self) -> None: """ @@ -350,7 +392,7 @@ async def ensure_running(self) -> str: await self.ensure_installed() if await self.is_running(): - return await self.get_uri() + return self._construct_uri() return await self.start()