Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions docker/standalone/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ WORKDIR /app/api
# Sync dependencies (will create lock file if needed)
RUN uv sync

# Copy source code
# Copy source code and alembic migrations
COPY hindsight-api/hindsight_api ./hindsight_api
COPY hindsight-api/alembic ./alembic

# Build TypeScript SDK
FROM node:20-alpine AS sdk-builder
Expand Down Expand Up @@ -70,6 +71,7 @@ RUN apt-get update && apt-get install -y \
libxml2 \
libssl3 \
libgssapi-krb5-2 \
libossp-uuid16 \
&& apt-get install -y libicu72 || apt-get install -y libicu74 || apt-get install -y libicu* \
&& curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get install -y nodejs \
Expand Down Expand Up @@ -97,6 +99,10 @@ COPY --from=cp-builder /app/.next ./.next
COPY --from=cp-builder /app/public ./public
COPY --from=cp-builder /app/next.config.ts ./next.config.ts

# For standalone mode, static files must be in .next/standalone/.next/static
RUN cp -r .next/static .next/standalone/.next/static
RUN cp -r public .next/standalone/public

WORKDIR /app

# Copy startup script
Expand All @@ -109,24 +115,20 @@ RUN mkdir -p /app/data && chown -R hindsight:hindsight /app
# Switch to non-root user
USER hindsight

# Install pg0
RUN curl -fsSL https://raw.githubusercontent.com/vectorize-io/pg0/main/install.sh | bash

# Start pg0 once to verify it works and pre-download PostgreSQL libraries
RUN pg0 --help && \
pg0 start --wait && \
pg0 stop

# Expose ports
EXPOSE 8888 3000

# Environment variables
# Environment variables (set PATH early so pg0 is accessible after install)
ENV PATH="/home/hindsight/.local/bin:/app/api/.venv/bin:${PATH}"
ENV HINDSIGHT_API_HOST=0.0.0.0
ENV HINDSIGHT_API_PORT=8888
ENV HINDSIGHT_API_LOG_LEVEL=info
ENV NODE_ENV=production
ENV HINDSIGHT_CP_DATAPLANE_API_URL=http://localhost:8888
ENV PATH="/home/hindsight/.local/bin:/app/api/.venv/bin:${PATH}"

# Install pg0 CLI (the API will handle starting PostgreSQL at runtime)
RUN curl -fsSL https://raw.githubusercontent.com/vectorize-io/pg0/main/install.sh | bash && \
pg0 --help

# Expose ports
EXPOSE 8888 3000

# Run startup script
CMD ["/app/start-all.sh"]
56 changes: 49 additions & 7 deletions hindsight-api/hindsight_api/pg0.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ def __init__(

self._process: Optional[subprocess.Popen] = None

def _construct_uri(self) -> str:
"""Construct PostgreSQL connection URI from instance settings."""
from urllib.parse import quote_plus
password_encoded = quote_plus(self.password)
return f"postgresql://{self.username}:{password_encoded}@localhost:{self.port}/{self.database}"

def is_installed(self) -> bool:
"""Check if the embedded-postgres CLI is installed."""
return self.binary_path.exists() and os.access(self.binary_path, os.X_OK)
Expand Down Expand Up @@ -190,8 +196,8 @@ def _run_command(self, *args: str, capture_output: bool = True) -> subprocess.Co
text=True,
)

async def _run_command_async(self, *args: str) -> tuple[int, str, str]:
"""Run an embedded-postgres command asynchronously."""
async def _run_command_async(self, *args: str, timeout: float = 60.0) -> tuple[int, str, str]:
"""Run an embedded-postgres command asynchronously with timeout."""
cmd = [str(self.binary_path), *args]

process = await asyncio.create_subprocess_exec(
Expand All @@ -200,8 +206,20 @@ async def _run_command_async(self, *args: str) -> tuple[int, str, str]:
stderr=asyncio.subprocess.PIPE,
)

stdout, stderr = await process.communicate()
return process.returncode, stdout.decode(), stderr.decode()
try:
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
return process.returncode or 0, stdout.decode(), stderr.decode()
except asyncio.TimeoutError:
# If timeout, pg0 may have started PostgreSQL but is hanging on communicate()
# This happens because PostgreSQL inherits the file descriptors
try:
process.kill()
await process.wait()
except ProcessLookupError:
# Process already exited, which is fine
pass
logger.warning(f"pg0 command timed out after {timeout}s, continuing...")
return 0, "", ""

async def start(self) -> str:
"""
Expand Down Expand Up @@ -236,8 +254,32 @@ async def start(self) -> str:

logger.info("Embedded PostgreSQL started")

# Get and return the URI
return await self.get_uri()
# Wait for PostgreSQL to be ready to accept connections
uri = self._construct_uri()
await self._wait_for_postgres(uri)

return uri

async def _wait_for_postgres(self, uri: str, timeout: float = 60.0) -> None:
"""Wait for PostgreSQL to be ready to accept connections."""
import asyncpg
start_time = asyncio.get_event_loop().time()
last_error = None

while (asyncio.get_event_loop().time() - start_time) < timeout:
try:
conn = await asyncio.wait_for(
asyncpg.connect(uri),
timeout=5.0
)
await conn.close()
logger.info("PostgreSQL is ready to accept connections")
return
except Exception as e:
last_error = e
await asyncio.sleep(1.0)

raise RuntimeError(f"PostgreSQL failed to become ready within {timeout}s: {last_error}")

async def stop(self) -> None:
"""
Expand Down Expand Up @@ -350,7 +392,7 @@ async def ensure_running(self) -> str:
await self.ensure_installed()

if await self.is_running():
return await self.get_uri()
return self._construct_uri()

return await self.start()

Expand Down
Loading