docker-compose.yaml

# Base configuration for Atoma services
x-atoma-node: &atoma-node
  image: ghcr.io/atoma-network/atoma-node:latest
  build:
    context: .
    dockerfile: Dockerfile
  platform: ${PLATFORM:-} # Will be empty if not set
  runtime: nvidia
  deploy:
    resources:
      reservations:
        devices:
          - driver: nvidia
            count: all
            capabilities: [gpu]
  volumes:
    - ${CONFIG_PATH:-./config.toml}:/app/config.toml
    - atoma-local-key:/app/local_key
    - ./logs:/app/logs
    - sui-config-volume:/root/.sui/sui_config
    - ${SUI_CONFIG_PATH:-~/.sui/sui_config}:/tmp/.sui/sui_config
    - ./data:/app/data
  env_file: .env
  networks:
    - atoma-network

# Base configuration for cuda inference services
x-inference-service-cuda: &inference-service-cuda
  runtime: nvidia
  deploy:
    resources:
      reservations:
        devices:
          - driver: nvidia
            count: all
            capabilities: [gpu]
  volumes:
    - ${HF_CACHE_PATH:-~/.cache/huggingface}:/root/.cache/huggingface
  env_file: .env
  networks:
    - atoma-network

# Base configuration for cpu inference services
x-inference-service-cpu: &inference-service-cpu
  volumes:
    - ${HF_CACHE_PATH:-~/.cache/huggingface}:/root/.cache/huggingface
  env_file: .env
  networks:
    - atoma-network

# Base configuration for ROCm inference services
x-inference-service-rocm: &inference-service-rocm
  runtime: rocm
  devices:
    - all
  volumes:
    - ${HF_CACHE_PATH:-~/.cache/huggingface}:/root/.cache/huggingface
  env_file: .env
  networks:
    - atoma-network

services:
  postgres-db:
    image: postgres:13
    platform: ${PLATFORM:-} # Will be empty if not set
    restart: always
    environment:
      - POSTGRES_DB=${POSTGRES_DB}
      - POSTGRES_USER=${POSTGRES_USER}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
    volumes:
      - postgres-data:/var/lib/postgresql/data
    ports:
      - "${POSTGRES_PORT:-5432}:5432"
    env_file: .env
    networks:
      - atoma-network
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB} -p 5432 || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 5

  prometheus:
    image: prom/prometheus:v3.1.0
    platform: ${PLATFORM:-}
    restart: always
    ports:
      - "${PROMETHEUS_PORT:-9090}:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus-data:/prometheus
    command:
      - "--config.file=/etc/prometheus/prometheus.yml"
      - "--storage.tsdb.path=/prometheus"
      - "--web.enable-lifecycle"
    env_file: .env
    networks:
      - atoma-network
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9090/-/healthy"]
      interval: 10s
      timeout: 5s
      retries: 5

  grafana:
    image: grafana/grafana:11.5.1
    platform: ${PLATFORM:-}
    restart: always
    ports:
      - "${GRAFANA_PORT:-30001}:3000"
    depends_on:
      - prometheus
    volumes:
      - grafana_data:/var/lib/grafana
    env_file: .env
    environment:
      - GF_SERVER_ROOT_URL=http://${GRAFANA_DOMAIN}:${GRAFANA_PORT:-30001}/
    networks:
      - atoma-network
    healthcheck:
      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3000/api/health || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 5

  loki:
    image: grafana/loki:2.9.4
    platform: ${PLATFORM:-}
    ports:
      - "3100:3100"
    volumes:
      - ./loki.yaml:/etc/loki/loki.yaml
      - loki-data:/loki
    command: -config.file=/etc/loki/loki.yaml
    networks:
      - atoma-network
    healthcheck:
      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3100/ready || exit 1"]
      interval: 10s
      timeout: 5s
      retries: 5

  tempo:
    image: grafana/tempo:2.7.0
    platform: ${PLATFORM:-}
    command: ["-config.file=/etc/tempo.yaml"]
    volumes:
      - ./tempo.yaml:/etc/tempo.yaml
      - tempo-data:/tmp/tempo
    ports:
      - "3200:3200"
    networks:
      - atoma-network
    user: "0"
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3200/status"]
      interval: 10s
      timeout: 5s
      retries: 5

  otel-collector:
    image: otel/opentelemetry-collector-contrib:0.119.0
    platform: ${PLATFORM:-}
    command: ["--config=/etc/otel-collector-config.yaml"]
    volumes:
      - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
    ports:
      - "4317:4317" # OTLP gRPC
      - "4318:4318" # OTLP HTTP
      - "8889:8889" # Prometheus exporter
    networks:
      - atoma-network
    depends_on:
      prometheus:
        condition: service_healthy
        required: true
      grafana:
        condition: service_healthy
        required: true
      loki:
        condition: service_healthy
        required: true
      tempo:
        condition: service_healthy
        required: true

  atoma-node:
    <<: *atoma-node
    ports:
      - "${ATOMA_SERVICE_PORT:-3000}:3000"
      - "127.0.0.1:${ATOMA_DAEMON_PORT:-3001}:3001"
      - "${ATOMA_P2P_PORT:-4001}:4001/udp"
      - "${ATOMA_P2P_PORT:-4001}:4001/tcp"
    depends_on:
      postgres-db:
        condition: service_started
        required: true
      vllm:
        condition: service_started
        required: false
      vllm-cpu:
        condition: service_started
        required: false
      vllm-rocm:
        condition: service_started
        required: false
      mistralrs-cpu:
        condition: service_started
        required: false
      tei:
        condition: service_started
        required: false
      mistralrs:
        condition: service_started
        required: false
      otel-collector:
        condition: service_started
        required: true

  vllm:
    <<: *inference-service-cuda
    container_name: chat-completions
    profiles: [chat_completions_vllm]
    image: vllm/vllm-openai:v0.7.3
    environment:
      # Backend for attention computation
      # Available options:
      # - "TORCH_SDPA": use torch.nn.MultiheadAttention
      # - "FLASH_ATTN": use FlashAttention
      # - "XFORMERS": use XFormers
      # - "ROCM_FLASH": use ROCmFlashAttention
      # - "FLASHINFER": use flashinfer (recommended for fp8 quantized models)
      - VLLM_ATTENTION_BACKEND=FLASH_ATTN
    ipc: host
    command: ${VLLM_ENGINE_ARGS}

  vllm-cpu:
    <<: *inference-service-cpu
    container_name: chat-completions
    profiles: [chat_completions_vllm_cpu]
    build:
      context: https://github.com/atoma-network/vllm.git#main
      dockerfile: Dockerfile.cpu
    command: --model ${CHAT_COMPLETIONS_MODEL} --max-model-len ${CHAT_COMPLETIONS_MAX_MODEL_LEN}

  vllm-rocm:
    <<: *inference-service-rocm
    container_name: chat-completions
    profiles: [chat_completions_vllm_rocm]
    build:
      context: https://github.com/atoma-network/vllm.git#main
      dockerfile: Dockerfile.rocm
    command: --model ${CHAT_COMPLETIONS_MODEL} --max-model-len ${CHAT_COMPLETIONS_MAX_MODEL_LEN} --tensor-parallel-size ${VLLM_TENSOR_PARALLEL_SIZE}

  mistralrs-cpu:
    <<: *inference-service-cpu
    container_name: chat-completions
    profiles: [chat_completions_mistralrs_cpu]
    image: ghcr.io/ericlbuehler/mistral.rs:cpu-0.4
    platform: linux/amd64
    command: plain -m ${CHAT_COMPLETIONS_MODEL}

  tei:
    <<: *inference-service-cuda
    container_name: embeddings
    profiles: [embeddings_tei]
    image: ${TEI_IMAGE}
    command: --model-id ${EMBEDDINGS_MODEL} --huggingface-hub-cache /root/.cache/huggingface/hub

  mistralrs:
    <<: *inference-service-cuda
    container_name: image-generations
    profiles: [image_generations_mistralrs]
    image: ${MISTRALRS_IMAGE}
    platform: ${PLATFORM:-}
    command: diffusion-plain -m ${IMAGE_GENERATIONS_MODEL} --arch ${IMAGE_GENERATIONS_ARCHITECTURE}

networks:
  atoma-network:
    driver: bridge
    name: atoma-network

volumes:
  postgres-data:
  grafana_data:
  sui-config-volume:
  prometheus-data:
  loki-data:
  tempo-data:
  atoma-local-key: