From 653d01c70ad4b3e96d17e2ea956479a6e41c0c98 Mon Sep 17 00:00:00 2001 From: knockknockyoo Date: Mon, 9 Mar 2026 10:48:27 -0400 Subject: [PATCH 1/4] Add the automated stage/prod data promotion with check-promotion-upper. --- ...uto_data_promotion_c1_qa_to_stage_prod.yml | 424 ++++++++++++++++++ config/sync_status.yml | 1 + prefect_c1_prod.yaml | 12 + 3 files changed, 437 insertions(+) create mode 100644 .github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml diff --git a/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml b/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml new file mode 100644 index 0000000..e18b21a --- /dev/null +++ b/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml @@ -0,0 +1,424 @@ +name: Data Promotion (CloudOne QA to Stage and Production) + +permissions: + contents: write + +on: + schedule: + - cron: "0 8 * * 6" # runs weekly on Saturday at 3:00 AM EST (8:00 UTC) + workflow_dispatch: + inputs: + dry_run: + description: "Export s3 bucket only" + required: false + type: boolean + default: false + +jobs: + detect-updated-models: + runs-on: ubuntu-latest + outputs: + model_filters: ${{ steps.detect.outputs.model_filters }} + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + fetch-depth: 0 + + # Uses last_promoted_sha_stage_prod from config/sync_status.yml (SHA of last commit that changed mdb_models.yml). + - name: Detect updated models since last promotion + id: detect + run: | + set -euo pipefail + ref=$(grep 'last_promoted_sha_stage_prod:' config/sync_status.yml | sed -E "s/.*last_promoted_sha_stage_prod: *['\"]?([a-f0-9]+)['\"]?.*/\1/" | tr -d ' \r' || true) + if [ -z "$ref" ]; then + echo "model_filters=" >> "$GITHUB_OUTPUT" + echo "No updated models detected (no last_promoted_sha_stage_prod)." + exit 0 + fi + # Parse git diff: model is "updated" if latest_version changed (prerelease fields are ignored). + current="" + updated="" + while IFS= read -r line; do + if [[ "$line" =~ ^@@.*@@[[:space:]]+([A-Za-z0-9_-]+): ]]; then + current="${BASH_REMATCH[1]}" + fi + if [[ -n "$current" ]] && [[ "$line" =~ ^\+[[:space:]]+latest_version: ]]; then + updated="${updated:+$updated }$current" + current="" + fi + done < <(git diff "$ref..HEAD" -- config/mdb_models.yml 2>/dev/null || true) + if [ -z "$updated" ]; then + echo "model_filters=" >> "$GITHUB_OUTPUT" + echo "No updated models detected." + else + # Build JSON array for Prefect (e.g. ["CDS","CTDC"]). + arr=$(echo "$updated" | tr ' ' '\n' | sort -u | jq -R -s -c 'split("\n") | map(select(length > 0))') + echo "model_filters=$arr" >> "$GITHUB_OUTPUT" + echo "Updated models: $updated" + fi + + check-qa-ready: + needs: detect-updated-models + if: ${{ needs.detect-updated-models.outputs.model_filters != '' }} + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + + - name: Set up Python + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c + with: + python-version-file: ".python-version" + + - name: Install uv + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 + with: + version: "0.5.10" + + - name: Install dependencies and project + run: | + uv pip install --system -e . + uv sync --all-extras --dev + + - name: Check 0 — Confirm QA is in sync with MDF and DEV + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + # Check 1: MDF vs QA (check_model_qa) - ensures QA has all models from MDF + # Check 2: DEV vs QA (check_model_sync) - ensures QA is in sync with DEV + # Using stage="post" performs both checks + PARAMS=$(jq -n \ + --argjson filters '${{ needs.detect-updated-models.outputs.model_filters }}' \ + '{ + "stage": "post", + "dev_mdb_id": "cloud-one-mdb-dev", + "qa_mdb_id": "cloud-one-mdb-qa", + "models_filter": $filters + }') + prefect deployment run 'check-promotion/check-promotion-upper' \ + --params "$PARAMS" \ + --watch + + export-c1-qa: + needs: check-qa-ready + runs-on: ubuntu-latest + outputs: + s3_key: ${{ steps.export.outputs.s3_key }} + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + + - name: Set up Python + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c + with: + python-version-file: ".python-version" + + - name: Install uv + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 + with: + version: "0.5.10" + + - name: Install dependencies and project + run: | + uv pip install --system -e . + uv sync --all-extras --dev + + - name: Export cloud-one-mdb-qa to S3 + id: export + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + + PARAMS=$(jq -n \ + '{ + "mdb_id": "cloud-one-mdb-qa", + "bucket": "cloudone-mdb-data" + }') + + echo "Running mdb-export-s3 with params: $PARAMS" + + watch_out=$( + prefect deployment run mdb-export-s3/mdb-export-s3 \ + --params "$PARAMS" \ + --watch \ + --watch-interval 30 2>&1 | tee export_watch.log + ) + + RUN_ID=$( + grep -oP "(?<=UUID:\s)[0-9a-fA-F-]+" export_watch.log \ + | head -n1 || true + ) + echo "Export flow run: $RUN_ID" + + TODAY=$(TZ="America/New_York" date +"%Y-%m-%d") + S3_KEY="${TODAY}__cloud-one-mdb-qa.graphml" + + echo "s3_key=$S3_KEY" >> $GITHUB_OUTPUT + echo "Generated S3 key: $S3_KEY" + + import-to-c1-stage: + needs: export-c1-qa + if: ${{ !fromJson(github.event.inputs.dry_run || 'false') }} + runs-on: ubuntu-latest + outputs: + s3_key: ${{ needs.export-c1-qa.outputs.s3_key }} + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + + - name: Set up Python + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c + with: + python-version-file: ".python-version" + + - name: Install uv + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 + with: + version: "0.5.10" + + - name: Install dependencies and project + run: | + uv pip install --system -e . + uv sync --all-extras --dev + + - name: Import to cloud-one-mdb-stage from S3 + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + + S3_KEY="${{ needs.export-c1-qa.outputs.s3_key }}" + + PARAMS=$(jq -n \ + --arg key "$S3_KEY" \ + '{ + "mdb_id": "cloud-one-mdb-stage", + "bucket": "cloudone-mdb-data", + "key": $key, + "clear_db": true + }') + + echo "Running mdb-import-s3 with params: $PARAMS" + + watch_out=$( + prefect deployment run mdb-import-s3/mdb-import-s3 \ + --params "$PARAMS" \ + --watch \ + --watch-interval 30 2>&1 | tee import_watch.log + ) + + RUN_ID=$( + grep -oP "(?<=UUID:\s)[0-9a-fA-F-]+" import_watch.log \ + | head -n1 || true + ) + echo "Import flow run: $RUN_ID" + + verify-stage-promotion: + needs: [import-to-c1-stage, detect-updated-models] + if: ${{ !fromJson(github.event.inputs.dry_run || 'false') && needs.detect-updated-models.outputs.model_filters != '' }} + runs-on: ubuntu-latest + outputs: + s3_key: ${{ needs.import-to-c1-stage.outputs.s3_key }} + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + fetch-depth: 0 + ref: ${{ github.ref }} + + - name: Set up Python + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c + with: + python-version-file: ".python-version" + + - name: Install uv + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 + with: + version: "0.5.10" + + - name: Install dependencies and project + run: | + uv pip install --system -e . + uv sync --all-extras --dev + + - name: Check Stage — Confirm Stage received all promoted models (QA vs Stage) + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + # This checks MDF vs Stage (Check 1) and QA vs Stage (Check 2) + # Important: Both QA and Stage have the same data from QA export + # API filters out prerelease versions, so both should be in sync + PARAMS=$(jq -n \ + --argjson filters '${{ needs.detect-updated-models.outputs.model_filters }}' \ + '{ + "stage": "post", + "dev_mdb_id": "cloud-one-mdb-qa", + "qa_mdb_id": "cloud-one-mdb-stage", + "models_filter": $filters + }') + prefect deployment run 'check-promotion/check-promotion-upper' \ + --params "$PARAMS" \ + --watch + + import-to-c1-prod: + needs: [verify-stage-promotion] + if: ${{ !fromJson(github.event.inputs.dry_run || 'false') && needs.verify-stage-promotion.result == 'success' }} + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + + - name: Set up Python + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c + with: + python-version-file: ".python-version" + + - name: Install uv + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 + with: + version: "0.5.10" + + - name: Install dependencies and project + run: | + uv pip install --system -e . + uv sync --all-extras --dev + + - name: Import to cloud-one-mdb-prod from S3 + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + + S3_KEY="${{ needs.verify-stage-promotion.outputs.s3_key }}" + # Note: Using QA export key from verify-stage-promotion; no separate Stage export/prune step in this workflow + + PARAMS=$(jq -n \ + --arg key "$S3_KEY" \ + '{ + "mdb_id": "cloud-one-mdb-prod", + "bucket": "cloudone-mdb-data", + "key": $key, + "clear_db": true + }') + + echo "Running mdb-import-s3 with params: $PARAMS" + + watch_out=$( + prefect deployment run mdb-import-s3/mdb-import-s3 \ + --params "$PARAMS" \ + --watch \ + --watch-interval 30 2>&1 | tee import_watch.log + ) + + RUN_ID=$( + grep -oP "(?<=UUID:\s)[0-9a-fA-F-]+" import_watch.log \ + | head -n1 || true + ) + echo "Import flow run: $RUN_ID" + + verify-prod-promotion: + needs: [import-to-c1-prod, detect-updated-models] + if: ${{ !fromJson(github.event.inputs.dry_run || 'false') && needs.detect-updated-models.outputs.model_filters != '' }} + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + with: + fetch-depth: 0 + ref: ${{ github.ref }} + + - name: Set up Python + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c + with: + python-version-file: ".python-version" + + - name: Install uv + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 + with: + version: "0.5.10" + + - name: Install dependencies and project + run: | + uv pip install --system -e . + uv sync --all-extras --dev + + - name: Check Production — Confirm Production received all promoted models (MDF vs Prod, QA vs Prod) + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + # This checks MDF vs Prod and QA vs Prod + PARAMS=$(jq -n \ + --argjson filters '${{ needs.detect-updated-models.outputs.model_filters }}' \ + '{ + "stage": "post", + "dev_mdb_id": "cloud-one-mdb-qa", + "qa_mdb_id": "cloud-one-mdb-prod", + "models_filter": $filters + }') + prefect deployment run 'check-promotion/check-promotion-upper' \ + --params "$PARAMS" \ + --watch + + - name: Check Sync — Confirm Stage and Production are in sync (Stage vs Prod) + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + # This checks MDF vs Prod and Stage vs Prod (using Stage as dev_mdb_id and Prod as qa_mdb_id) + PARAMS=$(jq -n \ + --argjson filters '${{ needs.detect-updated-models.outputs.model_filters }}' \ + '{ + "stage": "post", + "dev_mdb_id": "cloud-one-mdb-stage", + "qa_mdb_id": "cloud-one-mdb-prod", + "models_filter": $filters + }') + prefect deployment run 'check-promotion/check-promotion-upper' \ + --params "$PARAMS" \ + --watch + + # last_promoted_sha_stage_prod = SHA of the commit that last changed config/mdb_models.yml. + # Used by detect-updated to find models whose latest_version changed since then. + - name: Update last_promoted_sha_stage_prod in sync_status.yml + run: | + set -xeuo pipefail + export NEW_SHA=$(git log -1 --format=%H -- config/mdb_models.yml) + git config user.name "GitHub Actions Bot" + git config user.email "actions@github.com" + python - << 'PY' + import yaml, pathlib, os + p = pathlib.Path('config/sync_status.yml') + d = yaml.safe_load(p.read_text()) + d.setdefault('promotion', {})['last_promoted_sha_stage_prod'] = os.environ['NEW_SHA'] + p.write_text(yaml.dump(d, default_flow_style=False, sort_keys=False)) + PY + git add config/sync_status.yml + git diff --cached --quiet || git commit -m "chore: update last_promoted_sha_stage_prod to ${NEW_SHA}" + git push origin ${{ github.ref_name }} + + slack-notification: + needs: [detect-updated-models, check-qa-ready, export-c1-qa, import-to-c1-stage, verify-stage-promotion, import-to-c1-prod, verify-prod-promotion] + runs-on: ubuntu-latest + if: always() + steps: + - name: Slack Notification + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} + uses: act10ns/slack@87c73aef9f8838eb6feae81589a6b1487a4a9e08 # v1.6.0 + with: + status: ${{ (needs.detect-updated-models.result == 'failure' || needs.check-qa-ready.result == 'failure' || needs.export-c1-qa.result == 'failure' || needs.import-to-c1-stage.result == 'failure' || needs.verify-stage-promotion.result == 'failure' || needs.import-to-c1-prod.result == 'failure' || needs.verify-prod-promotion.result == 'failure') && 'failure' || 'success' }} + message: | + + ${{ (needs.detect-updated-models.result == 'failure' || needs.check-qa-ready.result == 'failure' || needs.export-c1-qa.result == 'failure' || needs.import-to-c1-stage.result == 'failure' || needs.verify-stage-promotion.result == 'failure' || needs.import-to-c1-prod.result == 'failure' || needs.verify-prod-promotion.result == 'failure') && 'Failure' || 'Success' }} diff --git a/config/sync_status.yml b/config/sync_status.yml index a4e4114..f3df796 100644 --- a/config/sync_status.yml +++ b/config/sync_status.yml @@ -6,3 +6,4 @@ caDSR: date_format: '%Y-%m-%d' promotion: last_promoted_sha: bec3959cb8ea66f80f5aba5ca305a50c95cbb85a + last_promoted_sha_stage_prod: 9deffc2e15e80f93ca470cf4333a32d210211921 diff --git a/prefect_c1_prod.yaml b/prefect_c1_prod.yaml index 3dbebb6..8f46853 100644 --- a/prefect_c1_prod.yaml +++ b/prefect_c1_prod.yaml @@ -166,6 +166,18 @@ deployments: entrypoint: src/bento_mdb/flows/update_c1_upper.py:update_c1_upper_flow parameters: {} schedule: null + work_pool: + name: mdb-8gb-prefect-prod-2.20.3-python3.9 + work_queue_name: + job_variables: {} +- name: check-promotion-upper + version: + tags: [] + concurrency_limit: + description: "Verify model promotion across environments: pre-promotion checks (QA vs MDF/DEV), post-promotion checks (Stage vs QA/MDF, Prod vs QA/MDF, Stage vs Prod)." + entrypoint: src/bento_mdb/flows/check_promotion.py:check_promotion_flow + parameters: {} + schedule: null work_pool: name: mdb-8gb-prefect-prod-2.20.3-python3.9 work_queue_name: From 94b2b3662bed5d507213aff3d162d5d2d0aabd1e Mon Sep 17 00:00:00 2001 From: knockknockyoo Date: Mon, 9 Mar 2026 11:01:25 -0400 Subject: [PATCH 2/4] fix: correct YAML indentation in import-to-c1-prod job Made-with: Cursor --- .../auto_data_promotion_c1_qa_to_stage_prod.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml b/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml index e18b21a..ba3d900 100644 --- a/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml +++ b/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml @@ -319,11 +319,11 @@ jobs: --watch-interval 30 2>&1 | tee import_watch.log ) - RUN_ID=$( - grep -oP "(?<=UUID:\s)[0-9a-fA-F-]+" import_watch.log \ - | head -n1 || true - ) - echo "Import flow run: $RUN_ID" + RUN_ID=$( + grep -oP "(?<=UUID:\s)[0-9a-fA-F-]+" import_watch.log \ + | head -n1 || true + ) + echo "Import flow run: $RUN_ID" verify-prod-promotion: needs: [import-to-c1-prod, detect-updated-models] From 5e333537b95ff57fc5938ae3387c6a1172d19125 Mon Sep 17 00:00:00 2001 From: knockknockyoo Date: Wed, 11 Mar 2026 17:32:45 -0400 Subject: [PATCH 3/4] 1. updated the scheduler time - dev/qa: 3AM daily - stage/prod: 12pm Saturday 2. Updated the log for the stage/prod tier. --- .github/workflows/auto_data_promotion.yml | 2 +- ...uto_data_promotion_c1_qa_to_stage_prod.yml | 110 ++++++++++++++++-- src/bento_mdb/flows/check_promotion.py | 65 ++++++----- 3 files changed, 141 insertions(+), 36 deletions(-) diff --git a/.github/workflows/auto_data_promotion.yml b/.github/workflows/auto_data_promotion.yml index 2c0106a..169146e 100644 --- a/.github/workflows/auto_data_promotion.yml +++ b/.github/workflows/auto_data_promotion.yml @@ -5,7 +5,7 @@ permissions: on: schedule: - - cron: "0 6 * * *" # runs daily at 1:00 AM EST + - cron: "0 8 * * *" # runs daily at 3:00 AM EST workflow_dispatch: inputs: dry_run: diff --git a/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml b/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml index ba3d900..cb123bf 100644 --- a/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml +++ b/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml @@ -5,7 +5,7 @@ permissions: on: schedule: - - cron: "0 8 * * 6" # runs weekly on Saturday at 3:00 AM EST (8:00 UTC) + - cron: "0 17 * * 6" # runs weekly on Saturday at 12:00 PM EST (17:00 UTC) workflow_dispatch: inputs: dry_run: @@ -219,13 +219,105 @@ jobs: ) echo "Import flow run: $RUN_ID" + prune-c1-stage: + needs: import-to-c1-stage + if: ${{ !fromJson(github.event.inputs.dry_run || 'false') }} + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + + - name: Set up Python + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c + with: + python-version-file: ".python-version" + + - name: Install uv + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 + with: + version: "0.5.10" + + - name: Install dependencies and project + run: | + uv pip install --system -e . + uv sync --all-extras --dev + + - name: Prune prerelease from cloud-one-mdb-stage + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + PARAMS=$(jq -n '{"mdb_id": "cloud-one-mdb-stage", "dry_run": false}') + echo "Running mdb-prune-prerelease with params: $PARAMS" + prefect deployment run mdb-prune-prerelease/prune-prerelease-prod \ + --params "$PARAMS" \ + --watch + + export-c1-stage: + needs: prune-c1-stage + if: ${{ !fromJson(github.event.inputs.dry_run || 'false') }} + runs-on: ubuntu-latest + outputs: + s3_key: ${{ steps.export.outputs.s3_key }} + steps: + - name: Checkout repository + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 + + - name: Set up Python + uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c + with: + python-version-file: ".python-version" + + - name: Install uv + uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 + with: + version: "0.5.10" + + - name: Install dependencies and project + run: | + uv pip install --system -e . + uv sync --all-extras --dev + + - name: Export cloud-one-mdb-stage to S3 + id: export + env: + PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }} + PREFECT_API_URL: ${{ secrets.PREFECT_API_URL_C1 }} + run: | + set -xeuo pipefail + PARAMS=$(jq -n \ + '{ + "mdb_id": "cloud-one-mdb-stage", + "bucket": "cloudone-mdb-data" + }') + echo "Running mdb-export-s3 (stage) with params: $PARAMS" + watch_out=$( + prefect deployment run mdb-export-s3/mdb-export-s3 \ + --params "$PARAMS" \ + --watch \ + --watch-interval 30 2>&1 | tee export_watch.log + ) + RUN_ID=$( + grep -oP "(?<=UUID:\s)[0-9a-fA-F-]+" export_watch.log \ + | head -n1 || true + ) + echo "Export flow run: $RUN_ID" + TODAY=$(TZ="America/New_York" date +"%Y-%m-%d") + S3_KEY="${TODAY}__cloud-one-mdb-stage.graphml" + echo "s3_key=$S3_KEY" >> $GITHUB_OUTPUT + echo "Generated S3 key: $S3_KEY" + verify-stage-promotion: - needs: [import-to-c1-stage, detect-updated-models] + needs: [import-to-c1-stage, export-c1-stage, detect-updated-models] if: ${{ !fromJson(github.event.inputs.dry_run || 'false') && needs.detect-updated-models.outputs.model_filters != '' }} runs-on: ubuntu-latest outputs: - s3_key: ${{ needs.import-to-c1-stage.outputs.s3_key }} + s3_key: ${{ needs.export-c1-stage.outputs.s3_key }} steps: + - name: Output Stage S3 key + run: echo "export-c1-stage s3_key=${{ needs.export-c1-stage.outputs.s3_key }}" + - name: Checkout repository uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 with: @@ -269,7 +361,7 @@ jobs: --watch import-to-c1-prod: - needs: [verify-stage-promotion] + needs: [export-c1-stage, verify-stage-promotion] if: ${{ !fromJson(github.event.inputs.dry_run || 'false') && needs.verify-stage-promotion.result == 'success' }} runs-on: ubuntu-latest steps: @@ -298,8 +390,8 @@ jobs: run: | set -xeuo pipefail - S3_KEY="${{ needs.verify-stage-promotion.outputs.s3_key }}" - # Note: Using QA export key from verify-stage-promotion; no separate Stage export/prune step in this workflow + # Use Stage export key directly from export-c1-stage (pruned data) + S3_KEY="${{ needs.export-c1-stage.outputs.s3_key }}" PARAMS=$(jq -n \ --arg key "$S3_KEY" \ @@ -310,7 +402,7 @@ jobs: "clear_db": true }') - echo "Running mdb-import-s3 with params: $PARAMS" + echo "Running mdb-import-s3 with params: $PARAMS (key=$S3_KEY)" watch_out=$( prefect deployment run mdb-import-s3/mdb-import-s3 \ @@ -409,7 +501,7 @@ jobs: git push origin ${{ github.ref_name }} slack-notification: - needs: [detect-updated-models, check-qa-ready, export-c1-qa, import-to-c1-stage, verify-stage-promotion, import-to-c1-prod, verify-prod-promotion] + needs: [detect-updated-models, check-qa-ready, export-c1-qa, import-to-c1-stage, prune-c1-stage, export-c1-stage, verify-stage-promotion, import-to-c1-prod, verify-prod-promotion] runs-on: ubuntu-latest if: always() steps: @@ -418,7 +510,7 @@ jobs: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} uses: act10ns/slack@87c73aef9f8838eb6feae81589a6b1487a4a9e08 # v1.6.0 with: - status: ${{ (needs.detect-updated-models.result == 'failure' || needs.check-qa-ready.result == 'failure' || needs.export-c1-qa.result == 'failure' || needs.import-to-c1-stage.result == 'failure' || needs.verify-stage-promotion.result == 'failure' || needs.import-to-c1-prod.result == 'failure' || needs.verify-prod-promotion.result == 'failure') && 'failure' || 'success' }} + status: ${{ (needs.detect-updated-models.result == 'failure' || needs.check-qa-ready.result == 'failure' || needs.export-c1-qa.result == 'failure' || needs.import-to-c1-stage.result == 'failure' || needs.prune-c1-stage.result == 'failure' || needs.export-c1-stage.result == 'failure' || needs.verify-stage-promotion.result == 'failure' || needs.import-to-c1-prod.result == 'failure' || needs.verify-prod-promotion.result == 'failure') && 'failure' || 'success' }} message: | ${{ (needs.detect-updated-models.result == 'failure' || needs.check-qa-ready.result == 'failure' || needs.export-c1-qa.result == 'failure' || needs.import-to-c1-stage.result == 'failure' || needs.verify-stage-promotion.result == 'failure' || needs.import-to-c1-prod.result == 'failure' || needs.verify-prod-promotion.result == 'failure') && 'Failure' || 'Success' }} diff --git a/src/bento_mdb/flows/check_promotion.py b/src/bento_mdb/flows/check_promotion.py index e39b2e2..5ab2c0f 100644 --- a/src/bento_mdb/flows/check_promotion.py +++ b/src/bento_mdb/flows/check_promotion.py @@ -1,15 +1,11 @@ """Promotion validation flow. -Check 0 — Confirm DEV is up to date before promotion starts -Compare MDF source files against MDB(DEV). Run before export (stage="pre"). -Check 1 — Confirm QA received all promoted models -Compare MDF vs MDB(QA). After promotion, if Expected inserts: 0, the data -promotion completed successfully and QA is fully in sync with MDF. -Run after import (stage="post"). +Check 0 (stage=pre): Confirm DEV is up to date before promotion — MDF vs MDB(DEV). + Used before export in DEV→QA promotion. -Check 2 — Check DEV DB and QA DB are in sync -Compare MDB-DEV vs MDB-QA. If Expected inserts: 0, DEV and QA are fully in sync. -Run after import (stage="post"). +Check 1 (stage=post): Confirm target received all promoted models — MDF vs MDB(qa_mdb_id). +Check 2 (stage=post): Check source and target are in sync — MDB(dev_mdb_id) vs MDB(qa_mdb_id). + dev_mdb_id/qa_mdb_id can be any pair (e.g. DEV/QA, QA/Stage, Stage/Prod). models_filter is passed from the workflow (YAML detect step). When None, all models from config/mdb_models.yml are used. @@ -35,6 +31,16 @@ _MDB_MODELS_PATH = _REPO_ROOT / "config/mdb_models.yml" +def _mdb_label(mdb_id: str) -> str: + """Return a short label for logging (e.g. cloud-one-mdb-stage -> STAGE).""" + if not mdb_id: + return "?" + suffix = mdb_id.split("-")[-1].lower() + if suffix in ("dev", "qa", "stage", "prod"): + return suffix.upper() + return mdb_id + + # ── shared helpers ───────────────────────────────────────────────────────────── def _connect(mdb_id: str) -> MDB: @@ -173,20 +179,21 @@ def check_model_dev(model: str, spec: dict, mdb_id: str) -> _DiffResult: @task(name="check-model-qa") def check_model_qa(model: str, spec: dict, mdb_id: str) -> _DiffResult: - """Check 1: Confirm QA received all promoted models — compare MDF vs MDB(QA).""" + """Check 1: Confirm target received all promoted models — compare MDF vs MDB(target).""" logger = get_run_logger() version = spec["latest_version"] mdb_version = version.lstrip("v") - logger.info("=== Diff: %s v%s (MDF vs MDB-QA) ===", model, mdb_version) + target_label = _mdb_label(mdb_id) + logger.info("=== Diff: %s v%s (MDF vs MDB-%s) ===", model, mdb_version, target_label) mdb = _connect(mdb_id) try: mdf_nodes, mdf_rels, mdf_props = _load_mdf_handles(spec, model, version) qa_nodes, qa_rels, qa_props = _query_handles(mdb, model, mdb_version) - _log_diff(logger, "NODES", mdf_nodes, qa_nodes, "MDF", "MDB-QA") - _log_diff(logger, "RELATIONSHIPS", mdf_rels, qa_rels, "MDF", "MDB-QA") - _log_diff(logger, "PROPERTIES", mdf_props, qa_props, "MDF", "MDB-QA") + _log_diff(logger, "NODES", mdf_nodes, qa_nodes, "MDF", f"MDB-{target_label}") + _log_diff(logger, "RELATIONSHIPS", mdf_rels, qa_rels, "MDF", f"MDB-{target_label}") + _log_diff(logger, "PROPERTIES", mdf_props, qa_props, "MDF", f"MDB-{target_label}") inserts = (len(mdf_nodes - qa_nodes) + len(mdf_rels - qa_rels) + len(mdf_props - qa_props)) @@ -194,7 +201,7 @@ def check_model_qa(model: str, spec: dict, mdb_id: str) -> _DiffResult: + len(qa_props - mdf_props)) logger.info("Expected inserts=%d removals=%d", inserts, removals) if inserts == 0 and removals == 0: - logger.info("Expected inserts: 0; the data promotion completed successfully and QA is fully in sync with MDF.") + logger.info("Expected inserts: 0; %s is fully in sync with MDF.", target_label) return _DiffResult(model, mdb_version, inserts, removals) finally: mdb.close() @@ -202,11 +209,13 @@ def check_model_qa(model: str, spec: dict, mdb_id: str) -> _DiffResult: @task(name="check-model-sync") def check_model_sync(model: str, spec: dict, dev_mdb_id: str, qa_mdb_id: str) -> _DiffResult: - """Check 2: Check DEV DB and QA DB are in sync — compare MDB-DEV vs MDB-QA.""" + """Check 2: Check source and target DBs are in sync — compare dev_mdb vs qa_mdb.""" logger = get_run_logger() version = spec["latest_version"] mdb_version = version.lstrip("v") - logger.info("=== Diff: %s v%s (MDB-DEV vs MDB-QA) ===", model, mdb_version) + source_label = _mdb_label(dev_mdb_id) + target_label = _mdb_label(qa_mdb_id) + logger.info("=== Diff: %s v%s (MDB-%s vs MDB-%s) ===", model, mdb_version, source_label, target_label) mdb_dev = _connect(dev_mdb_id) mdb_qa = _connect(qa_mdb_id) @@ -214,9 +223,9 @@ def check_model_sync(model: str, spec: dict, dev_mdb_id: str, qa_mdb_id: str) -> dev_nodes, dev_rels, dev_props = _query_handles(mdb_dev, model, mdb_version) qa_nodes, qa_rels, qa_props = _query_handles(mdb_qa, model, mdb_version) - _log_diff(logger, "NODES", dev_nodes, qa_nodes, "DEV", "QA") - _log_diff(logger, "RELATIONSHIPS", dev_rels, qa_rels, "DEV", "QA") - _log_diff(logger, "PROPERTIES", dev_props, qa_props, "DEV", "QA") + _log_diff(logger, "NODES", dev_nodes, qa_nodes, source_label, target_label) + _log_diff(logger, "RELATIONSHIPS", dev_rels, qa_rels, source_label, target_label) + _log_diff(logger, "PROPERTIES", dev_props, qa_props, source_label, target_label) inserts = (len(dev_nodes - qa_nodes) + len(dev_rels - qa_rels) + len(dev_props - qa_props)) @@ -224,7 +233,7 @@ def check_model_sync(model: str, spec: dict, dev_mdb_id: str, qa_mdb_id: str) -> + len(qa_props - dev_props)) logger.info("Expected inserts=%d removals=%d", inserts, removals) if inserts == 0 and removals == 0: - logger.info("Expected inserts: 0; DEV and QA are fully in sync.") + logger.info("Expected inserts: 0; %s and %s are fully in sync.", source_label, target_label) return _DiffResult(model, mdb_version, inserts, removals) finally: mdb_dev.close() @@ -243,8 +252,9 @@ def check_promotion_flow( """Promotion validation flow. Check 0 (stage=pre): Confirm DEV is up to date — MDF vs MDB(DEV). Run before export. - Check 1 (stage=post): Confirm QA received all promoted models — MDF vs MDB(QA). - Check 2 (stage=post): Check DEV and QA are in sync — MDB-DEV vs MDB-QA. + Check 1 (stage=post): Confirm target received all promoted models — MDF vs MDB(qa_mdb_id). + Check 2 (stage=post): Check source and target in sync — MDB(dev_mdb_id) vs MDB(qa_mdb_id). + dev_mdb_id/qa_mdb_id can be e.g. DEV/QA, QA/Stage, Stage/Prod. models_filter is passed from the workflow (detect step in YAML). When None, all models are used. """ @@ -268,15 +278,18 @@ def check_promotion_flow( logger.info("Check 0 PASSED — DEV is up to date with MDF.") elif stage == "post": + source_label = _mdb_label(dev_mdb_id) + target_label = _mdb_label(qa_mdb_id) + logger.info("=" * 60) - logger.info("Check 1 — Confirm QA received all promoted models (MDF vs MDB-QA)") + logger.info("Check 1 — Confirm %s received all promoted models (MDF vs MDB-%s)", target_label, target_label) logger.info("=" * 60) qa_results = [check_model_qa(model, spec, qa_mdb_id) for model, spec in specs.items()] _log_summary(logger, qa_results) logger.info("=" * 60) - logger.info("Check 2 — Check DEV DB and QA DB are in sync (MDB-DEV vs MDB-QA)") + logger.info("Check 2 — Check %s and %s are in sync (MDB-%s vs MDB-%s)", source_label, target_label, source_label, target_label) logger.info("=" * 60) sync_results = [ @@ -291,7 +304,7 @@ def check_promotion_flow( f"Post-promotion checks FAILED — {len(failed)} check(s) did not pass: " + ", ".join(f"{r.model}(v{r.version})" for r in failed) ) - logger.info("Post-promotion checks PASSED — QA received all promoted models and DEV/QA are in sync.") + logger.info("Post-promotion checks PASSED — %s received all promoted models and %s/%s are in sync.", target_label, source_label, target_label) else: raise ValueError(f"Unknown stage: {stage!r}. Must be 'pre' or 'post'.") From e7c630beba0683a761f0b8546ababbf87fe1a986 Mon Sep 17 00:00:00 2001 From: knockknockyoo Date: Thu, 12 Mar 2026 09:50:38 -0400 Subject: [PATCH 4/4] use the prod prefect deployment export-s3-prod --- .../auto_data_promotion_c1_qa_to_stage_prod.yml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml b/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml index cb123bf..cbf2107 100644 --- a/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml +++ b/.github/workflows/auto_data_promotion_c1_qa_to_stage_prod.yml @@ -143,7 +143,7 @@ jobs: echo "Running mdb-export-s3 with params: $PARAMS" watch_out=$( - prefect deployment run mdb-export-s3/mdb-export-s3 \ + prefect deployment run mdb-export-s3/mdb-export-s3-prod \ --params "$PARAMS" \ --watch \ --watch-interval 30 2>&1 | tee export_watch.log @@ -207,7 +207,7 @@ jobs: echo "Running mdb-import-s3 with params: $PARAMS" watch_out=$( - prefect deployment run mdb-import-s3/mdb-import-s3 \ + prefect deployment run mdb-import-s3/mdb-import-s3-prod \ --params "$PARAMS" \ --watch \ --watch-interval 30 2>&1 | tee import_watch.log @@ -293,7 +293,7 @@ jobs: }') echo "Running mdb-export-s3 (stage) with params: $PARAMS" watch_out=$( - prefect deployment run mdb-export-s3/mdb-export-s3 \ + prefect deployment run mdb-export-s3/mdb-export-s3-prod \ --params "$PARAMS" \ --watch \ --watch-interval 30 2>&1 | tee export_watch.log @@ -315,9 +315,6 @@ jobs: outputs: s3_key: ${{ needs.export-c1-stage.outputs.s3_key }} steps: - - name: Output Stage S3 key - run: echo "export-c1-stage s3_key=${{ needs.export-c1-stage.outputs.s3_key }}" - - name: Checkout repository uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 with: @@ -405,7 +402,7 @@ jobs: echo "Running mdb-import-s3 with params: $PARAMS (key=$S3_KEY)" watch_out=$( - prefect deployment run mdb-import-s3/mdb-import-s3 \ + prefect deployment run mdb-import-s3/mdb-import-s3-prod \ --params "$PARAMS" \ --watch \ --watch-interval 30 2>&1 | tee import_watch.log