Skip to content

Commit

Permalink
Merge pull request #52 from gleanerio/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
fils authored Nov 3, 2024
2 parents 86cadce + 9ee0855 commit 006ef62
Show file tree
Hide file tree
Showing 1,061 changed files with 6,620 additions and 189,280 deletions.
46 changes: 43 additions & 3 deletions .github/workflows/contanerize.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ on:
branches:
- main
- dev
- dv_dev
- dev_eco
# - dev_dagster142
- v0_generated_code
- 133_dev_sitemaps
- 151-integrate-community-stats-codes
tags:
- "v*.*.*"

Expand Down Expand Up @@ -154,7 +157,44 @@ jobs:
type=ref,event=branch
type=semver,pattern={{version}}
type=sha
build_code_workflows:
name: Dockerize Scheduler Workflows base
runs-on: ubuntu-latest
#strategy:
#matrix:
# project: [ "eco" ]
#project: [ "eco", "iow", "oih" ]
#platform: ["linux/amd64","linux/arm64"]
#platform: ["linux/amd64"] #linux/arm64 issues with building
steps:
- name: Set variables
run: |
REGISTRY_IMAGE=nsfearthcube/dagster-gleanerio-workflows
echo "REGISTRY_IMAGE=$REGISTRY_IMAGE" >> $GITHUB_ENV
working-directory: /
- name: Checkout Repo
uses: actions/checkout@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY_IMAGE }}
flavor: |
latest=true
tags: |
type=ref,event=tag
type=ref,event=branch
type=semver,pattern={{version}}
type=sha
# - name: Set up Python 3.10
# uses: actions/setup-python@v4
# with:
Expand Down Expand Up @@ -201,7 +241,7 @@ jobs:
build-args:
implnet=${{ matrix.project }}
#file: ./dagster/implnets/build/Dockerfile
file: ./build/Dockerfile_code
file: ./build/Dockerfile_workflows
context: "{{defaultContext}}:dagster/implnets"
tags: ${{ steps.meta.outputs.tags }}
# tags: nsfearthcube/ec_facets_client:latest
Expand Down
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,10 @@ venv/**
/dagster/.telemetry/
/dagster/.telemetry/
.env

/dagster/implnets/generatedCode/implnet-*/output/

/dagster/implnets/deployment/prod.env

**/tmp**
/dagster/dagster_home/
7 changes: 5 additions & 2 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions .idea/scheduler.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Notes

need to do dynamic ops.assets
https://medium.com/@thegreat.rashid83/dagster-sensors-partition-c7a5205d4c0d

## Development

At the top level (dagster/implents) you can run
Expand Down Expand Up @@ -28,3 +31,11 @@ will run just the task, and in editable form, i think.
## Some articles to review

[Medium on Dagster with configurable API and asset examples](https://medium.com/@alexandreguitton_12701/notes-1-2-dagster-data-orchestrator-hands-on-2af6772b13d9)

## Troubleshooting.
Keep the python versions in the DOCKER definitions in sync. GRPCC can be finicky

aka:

`FROM python:3.11-slim`

3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,8 @@ structured data on the web.

Details of the approach can be found in the [github io](https://earthcube.github.io/scheduler/).

NOTE: Generate code brach v0_generated_code branch
This is the original code that utilized a generate code approach to build the workflows.
v0_generated_code is where gleaner and nabu config file updates should be done when using the original code


1 change: 1 addition & 0 deletions dagster/dagster_home/.gitkeep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a place where dagster.yamls can be kept for runs
23 changes: 23 additions & 0 deletions dagster/dagster_home/dagster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
local_artifact_storage:
module: dagster.core.storage.root
class: LocalArtifactStorage
config:
base_dir: /Users/valentin/development/dev_earthcube/scheduler/dagster/dagster_home/
run_coordinator:
module: dagster.core.run_coordinator
class: QueuedRunCoordinator
config:
max_concurrent_runs: 6
# getting tags by copying from UI
tag_concurrency_limits:
- key: "ingest"
value: "docker"
limit: 3
- key: "ingest"
value: "report"
limit: 2
- key: "tenant_load"
value: "graph"
limit: 1
telemetry:
enabled: false
8 changes: 8 additions & 0 deletions dagster/implnets/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
.SHELLFLAGS += -e
VERSION :=`cat VERSION`

# ---- workflows ----
# no code generation is neede for workflows

wf-build:
podman build --tag="docker.io/fils/dagster_wf:$(VERSION)" --build-arg implnet=eco --file=./build/Dockerfile_workflows .

wf-push:
podman push docker.io/fils/dagster_wf:$(VERSION)

# ---- ECO ----

Expand Down
2 changes: 1 addition & 1 deletion dagster/implnets/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.67
0.1.0
Empty file added dagster/implnets/__init__.py
Empty file.
3 changes: 2 additions & 1 deletion dagster/implnets/build/Dockerfile_code
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ RUN mkdir -p /usr/src/app/workflows

RUN pip install --upgrade pip
## this is a base for the project. Build this 'layer' first
COPY ./requirements_code.txt requirements.txt
COPY ./requirements.txt requirements.txt
RUN pip install -r requirements.txt

# this add the code
# this is only needed because we generate the code with pygen. otherwise added in compose-project.yaml docker compose
COPY . scheduler
COPY ./configs/${implnet}/gleanerconfig.yaml scheduler/gleanerconfig.yaml

Expand Down
2 changes: 1 addition & 1 deletion dagster/implnets/build/Dockerfile_local
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ WORKDIR /usr/src/app
ENV DAGSTER_HOME=/usr/src/app


CMD ["dagster-webserver", "-w", "./project/${implnet}/workspace.yaml", "-h", "0.0.0.0", "-p", "3000"]
CMD [ "dagster", "api","grpc", "-h", "0.0.0.0", "-p", "4000", "-m", "workflows.tasks.tasks", "-d", "/usr/src/app/"]
43 changes: 43 additions & 0 deletions dagster/implnets/build/Dockerfile_workflows
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
FROM python:3.11-slim


# this file no longer needs to generate code. It will just include the base
# it will run ingest by default
# we may want to get an unreleased version of code, so this is needed

RUN apt-get update && apt-get install -y git
RUN pip install --upgrade pip
RUN apt-get install -y gcc musl-dev python3-dev
#RUN apt-get install libffi-dev
# Read the ARG implnet to set who to build for.

# docker buildandpush pulls the repo, so we need to put the code at a different location
# this fails becaus the dagster/implnets files are not in the docker
ARG implnet=eco

RUN mkdir -p /usr/src/app/workflows


## this is a base for the project. Build this 'layer' first
COPY ./requirements.txt requirements.txt
RUN pip install -r requirements.txt

# this add the code
COPY . scheduler
#COPY ./configs/${implnet}/gleanerconfig.yaml scheduler/gleanerconfig.yaml

COPY ./deployment/dagster.yaml /usr/src/app/

WORKDIR scheduler


COPY ./workflows/ /usr/src/app/workflows



# Change working directory
WORKDIR /usr/src/app
ENV DAGSTER_HOME=/usr/src/app


CMD [ "dagster", "api","grpc", "-h", "0.0.0.0", "-p", "4000", "-m", "workflows.tasks.tasks", "-d", "/usr/src/app/"]
Empty file.
Loading

0 comments on commit 006ef62

Please sign in to comment.