diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 320d9c1..d8ec806 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -19,9 +19,8 @@ jobs: fail-fast: false matrix: workshop-dir: ["scipy-2023"] - python-version: ["3.9"] - # python-version: ["3.8", "3.9", "3.10", "3.11"] - # os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.8", "3.9", "3.10", "3.11"] + os: ["ubuntu-latest", "macos-latest", "windows-latest"] env: @@ -47,7 +46,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install -r requirements.txt flytekitplugins-envd + pip install --force-reinstall git+https://github.com/flyteorg/flytekit@3006032 pip install pytest pytest-xdist - name: Pip info @@ -62,12 +62,23 @@ jobs: - name: Setup Flyte Sandbox run: | flytectl demo start - sleep 3 - flytectl --config /home/runner/.flyte/config-sandbox.yaml update task-resource-attribute --attrFile cra.yaml - pyflyte register --image ghcr.io/flyteorg/flyte-conference-talks:scipy-2023-latest workflows - - - name: Last Test - run: echo 'done' - - # - name: Integration Tests - # run: pytest tests/integration + + - name: Update Resources + uses: nick-fields/retry@v2 + with: + timeout_minutes: 10 + retry_wait_seconds: 15 + max_attempts: 10 + shell: pwsh + command: cd ${{ matrix.workshop-dir }} && flytectl --config /home/runner/.flyte/config-sandbox.yaml update task-resource-attribute --attrFile cra.yaml + + - name: Register Workflows + env: + FLYTECTL_CONFIG: /home/runner/.flyte/config-sandbox.yaml + uses: nick-fields/retry@v2 + with: + timeout_minutes: 10 + retry_wait_seconds: 15 + max_attempts: 10 + shell: pwsh + command: cd ${{ matrix.workshop-dir }} && pyflyte register --image ghcr.io/flyteorg/flyte-conference-talks:scipy-2023-ci workflows diff --git a/scipy-2023/Dockerfile b/scipy-2023/Dockerfile index 8a83f9d..9a4a2ce 100644 --- a/scipy-2023/Dockerfile +++ b/scipy-2023/Dockerfile @@ -18,7 +18,7 @@ ENV PATH="${VENV}/bin:$PATH" # Install Python dependencies COPY ./requirements.txt /root RUN pip install -r /root/requirements.txt -RUN pip install --force-reinstall git+https://github.com/flyteorg/flytekit@e7afdab8b +RUN pip install --force-reinstall git+https://github.com/flyteorg/flytekit@3006032 # Copy the code COPY . /root diff --git a/scipy-2023/Dockerfile.ci b/scipy-2023/Dockerfile.ci new file mode 100644 index 0000000..c058bf2 --- /dev/null +++ b/scipy-2023/Dockerfile.ci @@ -0,0 +1,24 @@ +# 🐳📦 Docker containers unlock OS-level reproducibility 🔄 +FROM python:3.10-slim-buster + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +RUN apt-get update && apt-get install -y git build-essential + +ENV VENV /opt/venv + +# Virtual environment +RUN python3 -m venv ${VENV} +ENV PATH="${VENV}/bin:$PATH" + +# Install Python dependencies +COPY ./requirements-ci.txt /root +RUN pip install -r /root/requirements-ci.txt + +# Copy the code +COPY . /root + diff --git a/scipy-2023/Makefile b/scipy-2023/Makefile index 0804371..475e0a1 100644 --- a/scipy-2023/Makefile +++ b/scipy-2023/Makefile @@ -1,4 +1,11 @@ VERSION=$(shell git rev-parse HEAD | cut -c1-7) -docker-build-push: - bash ./docker_build_push.sh -r ghcr.io/flyteorg -a flyte-conference-talks -v scipy-2023-${VERSION} - bash ./docker_build_push.sh -r ghcr.io/flyteorg -a flyte-conference-talks -v scipy-2023-latest + +docker-build: + docker build --push --tag ghcr.io/flyteorg/flyte-conference-talks:scipy-2023-${VERSION} . + docker build --push --tag ghcr.io/flyteorg/flyte-conference-talks:scipy-2023-latest . + +ci-docker-build: + docker build --push . -f Dockerfile.ci -t ghcr.io/flyteorg/flyte-conference-talks:scipy-2023-ci + +sandbox-docker-build: + docker build --push . -f Dockerfile.ci -t localhost:30000/flyte-conference-talks:scipy-2023-ci diff --git a/scipy-2023/README.md b/scipy-2023/README.md index c3575e5..f009979 100644 --- a/scipy-2023/README.md +++ b/scipy-2023/README.md @@ -113,7 +113,7 @@ source ~/venvs/scipy-2023/bin/activate Install dependencies: ```bash -pip install -r requirements.txt +pip install -r requirements.txt flytekitplugins-envd pip install jupyter ipdb ``` diff --git a/scipy-2023/requirements-ci.txt b/scipy-2023/requirements-ci.txt new file mode 100644 index 0000000..f611f48 --- /dev/null +++ b/scipy-2023/requirements-ci.txt @@ -0,0 +1,7 @@ +dataclasses_json +flytekit @ git+https://github.com/flyteorg/flytekit@3006032 +grpcio==1.51.3 +palmerpenguins +pandas +scikit-learn +typing_extensions diff --git a/scipy-2023/requirements.txt b/scipy-2023/requirements.txt index 5ecfea3..32754a7 100644 --- a/scipy-2023/requirements.txt +++ b/scipy-2023/requirements.txt @@ -14,5 +14,6 @@ pandas pyspark<3.4.0 scikit-learn torch --index-url https://download.pytorch.org/whl/cpu +typing_extensions wheel whylogs diff --git a/scipy-2023/tests/integration/test_workflows.py b/scipy-2023/tests/integration/test_workflows.py index d126c96..ebc1e9a 100644 --- a/scipy-2023/tests/integration/test_workflows.py +++ b/scipy-2023/tests/integration/test_workflows.py @@ -8,6 +8,7 @@ import logging import os import time +from datetime import timedelta from flytekit.remote import FlyteRemote from flytekit.configuration import Config @@ -29,6 +30,14 @@ else: config = Config.auto(CONFIG_PATH) +if int(os.environ.get("CI", 0)): + workflow_cases = WORKFLOW_CASES[:1] + poll_interval = timedelta(seconds=90) +else: + workflow_cases = WORKFLOW_CASES + poll_interval = None + + remote = FlyteRemote( config=config, default_project="flytesnacks", @@ -36,19 +45,19 @@ ) -@pytest.mark.parametrize("wf_case", WORKFLOW_CASES) +@pytest.mark.parametrize("wf_case", workflow_cases) def test_workflow_remote(wf_case: WorkflowCase): - for _ in range(60): + for _ in range(120): # bypass issue where multiple remote objects are authenticating at the # same time. try: flyte_wf = remote.fetch_workflow(name=wf_case.workflow.name) break - except OSError: - time.sleep(1) + except Exception: + time.sleep(5) execution = remote.execute(flyte_wf, inputs=wf_case.inputs) url = remote.generate_console_url(execution) logger.info(f"Running workflow {wf_case.workflow.name} at: {url}") - execution = remote.wait(execution) + execution = remote.wait(execution, poll_interval=poll_interval) assert execution.closure.phase == SUCCEED_STATUS diff --git a/scipy-2023/workflows/example_00_intro.py b/scipy-2023/workflows/example_00_intro.py index 1aefdd2..7f7f1bb 100644 --- a/scipy-2023/workflows/example_00_intro.py +++ b/scipy-2023/workflows/example_00_intro.py @@ -33,7 +33,6 @@ "body_mass_g", ] - @dataclass_json @dataclass class Hyperparameters: diff --git a/scipy-2023/workflows/example_03_plugins.py b/scipy-2023/workflows/example_03_plugins.py index 342c33b..d491954 100644 --- a/scipy-2023/workflows/example_03_plugins.py +++ b/scipy-2023/workflows/example_03_plugins.py @@ -2,7 +2,6 @@ import os from dataclasses import dataclass -from typing import Annotated import pandas as pd import pyspark.sql @@ -20,6 +19,11 @@ from workflows.example_00_intro import FEATURES, TARGET +try: + from typing import Annotated +except ImportError: + from typing_extensions import Annotated + @dataclass_json @dataclass diff --git a/scipy-2023/workflows/example_07_caching.py b/scipy-2023/workflows/example_07_caching.py index 7939ce5..8ee776a 100644 --- a/scipy-2023/workflows/example_07_caching.py +++ b/scipy-2023/workflows/example_07_caching.py @@ -1,7 +1,7 @@ """Recoverability: Caching for compute efficiency.""" from dataclasses import asdict -from typing import Annotated, List, Tuple +from typing import List, Tuple import numpy as np import pandas as pd @@ -12,6 +12,11 @@ from flytekit import task, workflow, dynamic, HashMethod, Resources +try: + from typing import Annotated +except ImportError: + from typing_extensions import Annotated + from workflows.example_06_reproducibility import ( get_data, diff --git a/scipy-2023/workflows/example_10_flyte_decks.py b/scipy-2023/workflows/example_10_flyte_decks.py index 7af3381..2698e74 100644 --- a/scipy-2023/workflows/example_10_flyte_decks.py +++ b/scipy-2023/workflows/example_10_flyte_decks.py @@ -1,20 +1,19 @@ """Auditability: Flyte Decks for Pipeline visibility.""" -from typing import Annotated - import pandas as pd from palmerpenguins import load_penguins +try: + from typing import Annotated +except ImportError: + from typing_extensions import Annotated + from workflows.example_00_intro import FEATURES, TARGET import whylogs as why from flytekit import task, workflow, Deck, Resources from flytekitplugins.deck import FrameProfilingRenderer -from flytekitplugins.whylogs.renderer import ( - WhylogsConstraintsRenderer, - WhylogsSummaryDriftRenderer, -) -from flytekitplugins.whylogs.schema import WhylogsDatasetProfileTransformer +from flytekitplugins.whylogs.renderer import WhylogsConstraintsRenderer from whylogs.core import DatasetProfileView from whylogs.core.constraints import ConstraintsBuilder from whylogs.core.constraints.factories import (