Skip to content

Commit

Permalink
Add Docker Compose YAML files for local OIH deployment
Browse files Browse the repository at this point in the history
Introduced several Docker Compose YAML files to support local deployment and configuration of OIH, enabling easier local testing and environment management. The new files include various configurations for different local testing setups without external routing.
  • Loading branch information
fils committed Nov 6, 2024
1 parent bc89b48 commit 99f5891
Show file tree
Hide file tree
Showing 6 changed files with 1,202 additions and 0 deletions.
304 changes: 304 additions & 0 deletions dagster/implnets/deployment/oih/compose_local.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
# version: "3.9"

# networks: production uses traefik_proxy, buth
# this is local testing
networks:
traefik_proxy:
# driver: bridge
headless:
external: true
name: ${GLEANERIO_HEADLESS_NETWORK:-headless_gleanerio}
volumes:
dagster-postgres:
driver: local
# dagster-storage:
# driver: local
# let yourself use local configuration
configs:
# gleaner:
# name: ${GLEANERIO_DOCKER_GLEANER_CONFIG:-gleaner}
# file: ../configs/${PROJECT:-eco}/gleanerconfig.yaml
# nabu:
# name: ${GLEANERIO_DOCKER_NABU_CONFIG:-nabu}
# file: ../configs/${PROJECT:-eco}/nabuconfig.yaml
workspace:
name: ${GLEANERIO_WORKSPACE_DOCKER_CONFIG:-workspace}
file: ../configs/${PROJECT:-eco}/workspace.yaml
# DO NOT START IN PYCHARM (unless you set the env variables in your runconfig
# USE:
# ./docker_localstart.sh
services:
dagster-dagit:
# platform: linux/x86_64
# build:
# #context: .
# context: ..
# dockerfile: build/Dockerfile_dagster
# args:
# implnet: ${PROJECT:-eco}
# you should be able to change the source locally, without a full rebuild.
#image: dagster-${PROJECT:-eco}:latest
image: dagster-local:latest
command: 'dagster-webserver -w ${GLEANERIO_WORKSPACE_CONFIG_PATH:-/usr/src/app/workspace.yaml} -h "0.0.0.0" -p 3000'
configs: &configs
- source: workspace
target: /usr/src/app/workspace.yaml
# uid: "103"
# gid: "103"
mode: 0444
# - source: gleaner
# target: /scheduler/gleanerconfig.yaml
# mode:
# 0444
# - source: nabu
# target: /scheduler/nabuconfig.yaml
# # uid: "103"
# # gid: "103"
# mode:
# 044
volumes: &vol
- ../deployment/dagster.yaml:/usr/src/app/dagster.yaml
# - ../generatedCode/implnet-${PROJECT:-eco}/output/:/usr/src/app/project/${PROJECT:-eco}
- ../workflows/:/usr/src/app/workflows
# GLEANEERIO_ the environment variables for this stack, passed into containers
# the variables passed into the containers varies due to inconsistent standards.
# this there are prefixed by project aka ECO_ for customization
# DO NOT RENAME THE FIRST PART, aka the container environment variable,
# unless you sure what you are doing
# sort these in BBedit to make finding them easier
environment: &env
- DEBUG_CONTAINER=${DEBUG_CONTAINER:-false}
- GLEANERIO_CONFIG_PATH=${GLEANERIO_CONFIG_PATH:-scheduler/configs/test/}
- GLEANERIO_DAGSTER_CONFIG_PATH=${GLEANERIO_DAGSTER_CONFIG_PATH:-scheduler/logs/}
- GLEANERIO_DOCKER_CONTAINER_WAIT_TIMEOUT=${GLEANERIO_DOCKER_CONTAINER_WAIT_TIMEOUT:-300}
- GLEANERIO_DOCKER_GLEANER_CONFIG=${GLEANERIO_DOCKER_GLEANER_CONFIG:-gleaner}
- GLEANERIO_DOCKER_HEADLESS_NETWORK=${GLEANERIO_DOCKER_HEADLESS_NETWORK}
- GLEANERIO_DOCKER_NABU_CONFIG=${GLEANERIO_DOCKER_NABU_CONFIG:-nabu}
- GLEANERIO_DOCKER_URL=${GLEANERIO_DOCKER_URL}
- GLEANERIO_DOCKER_WORKSPACE_CONFIG=${GLEANERIO_DOCKER_WORKSPACE_CONFIG}
- GLEANERIO_GLEANER_CONFIG_PATH=${GLEANERIO_GLEANER_CONFIG_PATH:-/configs/gleaner/gleanerconfig.yaml}
- GLEANERIO_GLEANER_IMAGE=${GLEANERIO_GLEANER_IMAGE}
- GLEANERIO_GRAPH_NAMESPACE=${GLEANERIO_GRAPH_NAMESPACE}
- GLEANERIO_GRAPH_SUMMARIZE=${GLEANERIO_GRAPH_SUMMARIZE:-false}
- GLEANERIO_GRAPH_SUMMARY_ENDPOINT=${GLEANERIO_GRAPH_SUMMARY_ENDPOINT:-${GLEANERIO_GRAPH_URL}}
- GLEANERIO_GRAPH_SUMMARY_NAMESPACE=${GLEANERIO_GRAPH_SUMMARY_NAMESPACE}
- GLEANERIO_GRAPH_URL=${GLEANERIO_GRAPH_URL}
- GLEANERIO_HEADLESS_ENDPOINT=${GLEANERIO_HEADLESS_ENDPOINT}
- GLEANERIO_LOG_PREFIX=${GLEANERIO_LOG_PREFIX:-scheduler/logs/}
- GLEANERIO_MINIO_ACCESS_KEY=${GLEANERIO_MINIO_ACCESS_KEY}
- GLEANERIO_MINIO_ADDRESS=${GLEANERIO_MINIO_ADDRESS}
- GLEANERIO_MINIO_BUCKET=${GLEANERIO_MINIO_BUCKET}
- GLEANERIO_MINIO_PORT=${GLEANERIO_MINIO_PORT}
- GLEANERIO_MINIO_SECRET_KEY=${GLEANERIO_MINIO_SECRET_KEY}
- GLEANERIO_MINIO_USE_SSL=${GLEANERIO_MINIO_USE_SSL}
- GLEANERIO_NABU_CONFIG_PATH=${GLEANERIO_NABU_CONFIG_PATH:-/configs/gleaner/nabuconfig.yaml}
- GLEANERIO_NABU_IMAGE=${GLEANERIO_NABU_IMAGE}
- GLEANERIO_PORTAINER_APIKEY=${GLEANERIO_PORTAINER_APIKEY}
- GLEANERIO_DEFAULT_SCHEDULE=${GLEANERIO_DEFAULT_SCHEDULE:-@weekly}
- GLEANERIO_DEFAULT_SCHEDULE_TIMEZONE=${GLEANERIO_DEFAULT_SCHEDULE_TIMEZONE:-"America/Los_Angeles"}
- GLEANERIO_SOURCES_FILENAME=${GLEANERIO_SOURCES_FILENAME:-gleanerconfig.yaml}
- GLEANERIO_TENANT_FILENAME=${GLEANERIO_TENANT_FILENAME:-tenant.yaml}
- GLEANERIO_WORKSPACE_CONFIG_PATH=${GLEANERIO_WORKSPACE_CONFIG_PATH}
- PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
- SLACK_CHANNEL=${SLACK_CHANNEL:-"#twitterfeed"}
- SLACK_TOKEN=${SLACK_TOKEN}

ports:
- 3000:3000
networks:
- traefik_proxy
depends_on:
- dagster-postgres
# - dagster-code-project
- dagster-code-ingest
- dagster-code-tasks
labels:
- "traefik.enable=true"
- "traefik.http.routers.sched.entrypoints=http"
- "traefik.http.routers.sched.priority=13"
- "traefik.http.routers.sched.rule=Host(`localhost`)"
- "traefik.http.middlewares.sched-https-redirect.redirectscheme.scheme=https"
- "traefik.http.routers.sched.middlewares=sched-https-redirect"
- "traefik.http.routers.sched-secure.entrypoints=https"
- "traefik.http.routers.sched-secure.priority=12"
- "traefik.http.routers.sched-secure.rule=Host(`localhost`)"
- "traefik.http.routers.sched-secure.tls=true"
- "traefik.http.routers.sched-secure.tls.certresolver=httpresolver"
- "traefik.http.routers.sched-secure.service=sched"
- "traefik.http.services.sched.loadbalancer.server.port=3000"
- "traefik.docker.network=traefik_proxy"
- "traefik.http.middlewares.sched.headers.accesscontrolallowmethods=GET,OPTIONS,POST"
- "traefik.http.middlewares.sched.headers.accessControlAllowOriginList=*"
- "traefik.http.middlewares.sched.headers.accesscontrolmaxage=100"
- "traefik.http.middlewares.sched.headers.addvaryheader=true"
dagster-daemon:
platform: linux/x86_64
# build:
# #context: .
# context: ..
# dockerfile: build/Dockerfile_dagster
# args:
# implnet: ${PROJECT:-eco}
# you should be able to change the source locally, without a full rebuild.
#image: dagster-${PROJECT:-eco}:latest
image: dagster-local:latest
configs: *configs
environment: *env
command: "dagster-daemon run -w ${GLEANERIO_WORKSPACE_CONFIG_PATH:-/usr/src/app/workspace.yaml}"
volumes: *vol
depends_on:
- dagster-postgres
# - dagster-code-project
- dagster-code-ingest
- dagster-code-tasks
networks:
- traefik_proxy
dagster-postgres:
image: postgres:13.3
ports:
- 5432:5432
environment:
- POSTGRES_PASSWORD=secret
volumes:
- dagster-postgres:/var/lib/postgresql/data
networks:
- traefik_proxy
headless:
platform: linux/x86_64
# image: chromedp/headless-shell:stable
# stable after 105 causes "devtool: CreateURL: Using unsafe HTTP verb GET to invoke /json/new. This action supports only PUT verb.",
image: chromedp/headless-shell:105.0.5195.127
# build:
# context: .
# shm_size: "2gb"
restart: unless-stopped
shm_size: 265mb
labels:
- "traefik.enable=false"
- "traefik.backend=headlesschrome"
- "traefik.port=9222"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}.entrypoints=http"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}.priority=13"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}.rule=Host(`headlesschrome.${HOST? HOST is required}`)"
- "traefik.http.middlewares.headlesschrome-https-redirect.redirectscheme.scheme=https"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}.middlewares=sched-https-redirect"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}-secure.entrypoints=https"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}-secure.priority=12"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}-secure.rule=Host(`headlesschrome.${HOST? HOST is required}`)"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}-secure.tls=true"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}-secure.tls.certresolver=httpresolver"
- "traefik.http.routers.headlesschrome-${PROJECT:-eco}-secure.service=headlesschrome-${PROJECT}"
- "traefik.http.services.sched-${PROJECT:-eco}.loadbalancer.server.port=9222"
- "traefik.docker.network=traefik_proxy"
ports:
- 9222:9222
environment:
- SERVICE_PORTS=9222
# method to get a bigger shm space, since shm_size does not work on a swarm
tmpfs:
- /tmp:size=256M
volumes:
- type: tmpfs
target: /dev/shm

# adding the "bridge" network does not work to give access to the 'gleaner and nabu' containers started
# by this process. There is no name resolution on that network.
networks:
- traefik_proxy
- headless
# in code, use names defined in network above
dagster-code-ingest:
platform: linux/x86_64
# build:
# #context: .
# context: ..
# dockerfile: build/Dockerfile_workflows
# args:
# implnet: ${PROJECT:-eco}
# you should be able to change the source locally, without a full rebuild.
#image: dagster-${PROJECT:-eco}:latest
image: dagster-gleanerio-local:latest

environment: *env
command:
- "dagster"
- "api"
- "grpc"
- "-h"
- "0.0.0.0"
- "-p"
- "4000"
- "-m"
- "workflows.ingest.ingest"
- "-d"
- "/usr/src/app/"

volumes: *vol
depends_on:
- dagster-postgres
networks:
- traefik_proxy

dagster-code-tasks:
platform: linux/x86_64
# build:
# #context: .
# context: ..
# dockerfile: build/Dockerfile_workflows
# args:
# implnet: ${PROJECT:-eco}
# you should be able to change the source locally, without a full rebuild.
#image: dagster-${PROJECT:-eco}:latest
image: dagster-gleanerio-local:latest

environment: *env
command:
- "dagster"
- "api"
- "grpc"
- "-h"
- "0.0.0.0"
- "-p"
- "4000"
- "-m"
- "workflows.tasks.tasks"
- "-d"
- "/usr/src/app/"

volumes: *vol
depends_on:
- dagster-postgres
networks:
- traefik_proxy
# dagster-code-project:
#
# platform: linux/x86_64
# build:
# #context: .
# context: ..
# dockerfile: build/Dockerfile_code
# args:
# implnet: ${PROJECT:-eco}
# # you should be able to change the source locally, without a full rebuild.
# #image: dagster-${PROJECT:-eco}:latest
# image: dagster-gleanerio-local:latest
#
# environment: *env
# command:
# - "dagster"
# - "api"
# - "grpc"
# - "-h"
# - "0.0.0.0"
# - "-p"
# - "4000"
# - "--python-file"
# - "/usr/src/app/project/${PROJECT:-eco}/repositories/repository.py"
# - "-d"
# - "/usr/src/app/project/${PROJECT:-eco}/"
#
# volumes: *vol
# depends_on:
# - dagster-postgres
# networks:
# - traefik_proxy
Loading

0 comments on commit 99f5891

Please sign in to comment.