From 86ecbb8b1abcbe981a6dfe483a015fc54d8bd4d8 Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 1 Feb 2024 10:25:41 -0600 Subject: [PATCH 1/6] add load test with default values --- .github/workflows/load_test.yml | 31 +++++++++++++++++++++++++++++++ scripts/load_test.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 .github/workflows/load_test.yml create mode 100644 scripts/load_test.py diff --git a/.github/workflows/load_test.yml b/.github/workflows/load_test.yml new file mode 100644 index 00000000..fe305292 --- /dev/null +++ b/.github/workflows/load_test.yml @@ -0,0 +1,31 @@ +name: Harvester 2.0 Load Test +on: + workflow_dispatch: + +jobs: + load-test: + name: H2.0 Load Test + runs-on: ubuntu-latest + steps: + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Display Python version + run: python -c "import sys; print(sys.version)" + - name: Install Poetry + uses: snok/install-poetry@v1 + - name: + run: + poetry install + - name: Run Test + env: + SRC_TITLE: 'EPA ScienceHub' + SRC_URL: 'https://pasteur.epa.gov/metadata.json' + SRC_OWNER_ORG: '82b85475-f85d-404a-b95b-89d1a42e9f6b' + SRC_SOURCE_TYPE: 'datajson' + CKAN_URL: ${{secrets.CKAN_URL}} + CKAN_API_TOKEN: ${{secrets.CKAN_API_TOKEN}} + run: | + python load_test.py + \ No newline at end of file diff --git a/scripts/load_test.py b/scripts/load_test.py new file mode 100644 index 00000000..696c58fa --- /dev/null +++ b/scripts/load_test.py @@ -0,0 +1,32 @@ + +import os +import sys + +sys.path.insert(1, "/".join(os.path.realpath(__file__).split("/")[0:-2])) + +from harvester.harvest import HarvestSource + +title = os.environ['SRC_TITLE'] +url = os.environ['SRC_URL'] +owner_org = os.environ['SRC_OWNER_ORG'] +source_type = os.environ['SRC_SOURCE_TYPE'] + +print(title) +print(url) +print(owner_org) +print(source_type) + +harvest_source = HarvestSource( + title, + url, + owner_org, + source_type +) + +if harvest_source.extract_type == "waf-collection": + continue +harvest_source.get_record_changes() +if harvest_source.no_harvest_resp is True: + continue +harvest_source.synchronize_records() +harvest_source.report() From 575e014d24c89ad29afb0094910f82a684669cb9 Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 1 Feb 2024 10:44:07 -0600 Subject: [PATCH 2/6] remove continue loops --- .github/workflows/load_test.yml | 2 +- scripts/load_test.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/load_test.yml b/.github/workflows/load_test.yml index fe305292..8aef5694 100644 --- a/.github/workflows/load_test.yml +++ b/.github/workflows/load_test.yml @@ -27,5 +27,5 @@ jobs: CKAN_URL: ${{secrets.CKAN_URL}} CKAN_API_TOKEN: ${{secrets.CKAN_API_TOKEN}} run: | - python load_test.py + poetry run python scripts/load_test.py \ No newline at end of file diff --git a/scripts/load_test.py b/scripts/load_test.py index 696c58fa..029422f4 100644 --- a/scripts/load_test.py +++ b/scripts/load_test.py @@ -23,10 +23,6 @@ source_type ) -if harvest_source.extract_type == "waf-collection": - continue harvest_source.get_record_changes() -if harvest_source.no_harvest_resp is True: - continue harvest_source.synchronize_records() harvest_source.report() From 6d6da9f3531095ddae1e5b0c7781ae3cce50128f Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 1 Feb 2024 10:46:11 -0600 Subject: [PATCH 3/6] fix lint --- .github/workflows/load_test.yml | 1 - scripts/load_test.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/load_test.yml b/.github/workflows/load_test.yml index 8aef5694..3eb3657a 100644 --- a/.github/workflows/load_test.yml +++ b/.github/workflows/load_test.yml @@ -28,4 +28,3 @@ jobs: CKAN_API_TOKEN: ${{secrets.CKAN_API_TOKEN}} run: | poetry run python scripts/load_test.py - \ No newline at end of file diff --git a/scripts/load_test.py b/scripts/load_test.py index 029422f4..dabda2ae 100644 --- a/scripts/load_test.py +++ b/scripts/load_test.py @@ -4,7 +4,7 @@ sys.path.insert(1, "/".join(os.path.realpath(__file__).split("/")[0:-2])) -from harvester.harvest import HarvestSource +from harvester.harvest import HarvestSource # noqa E402 title = os.environ['SRC_TITLE'] url = os.environ['SRC_URL'] From 685f585efed17203322929c99ae47e8944b5f7f3 Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 1 Feb 2024 16:13:00 -0600 Subject: [PATCH 4/6] updates to script --- .env | 2 +- .github/workflows/load_test.yml | 23 ++++++++++++++++++----- scripts/load_test.py | 13 +++++++------ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/.env b/.env index 74b8ffed..29e14bee 100644 --- a/.env +++ b/.env @@ -14,4 +14,4 @@ S3FILESTORE__AWS_ACCESS_KEY_ID=_placeholder S3FILESTORE__AWS_SECRET_ACCESS_KEY=_placeholder S3FILESTORE__SIGNATURE_VERSION=s3v4 -MDTRANSLATOR_URL=http://127.0.0.1:3000/translates \ No newline at end of file +MDTRANSLATOR_URL=http://127.0.0.1:3000/translates diff --git a/.github/workflows/load_test.yml b/.github/workflows/load_test.yml index 3eb3657a..a384d470 100644 --- a/.github/workflows/load_test.yml +++ b/.github/workflows/load_test.yml @@ -1,4 +1,4 @@ -name: Harvester 2.0 Load Test +name: Load Test on: workflow_dispatch: @@ -7,6 +7,8 @@ jobs: name: H2.0 Load Test runs-on: ubuntu-latest steps: + - name: checkout + uses: actions/checkout@v3 - name: Setup python uses: actions/setup-python@v4 with: @@ -15,10 +17,20 @@ jobs: run: python -c "import sys; print(sys.version)" - name: Install Poetry uses: snok/install-poetry@v1 - - name: - run: - poetry install - - name: Run Test + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + - name: Run Load Test env: SRC_TITLE: 'EPA ScienceHub' SRC_URL: 'https://pasteur.epa.gov/metadata.json' @@ -27,4 +39,5 @@ jobs: CKAN_URL: ${{secrets.CKAN_URL}} CKAN_API_TOKEN: ${{secrets.CKAN_API_TOKEN}} run: | + source .venv/bin/activate poetry run python scripts/load_test.py diff --git a/scripts/load_test.py b/scripts/load_test.py index dabda2ae..4dfc53e9 100644 --- a/scripts/load_test.py +++ b/scripts/load_test.py @@ -11,18 +11,19 @@ owner_org = os.environ['SRC_OWNER_ORG'] source_type = os.environ['SRC_SOURCE_TYPE'] -print(title) -print(url) -print(owner_org) -print(source_type) +print('Running load test for the following harvest config') +print(f'title: {title}') +print(f'url: {url}') +print(f'owner_org: {owner_org}') +print(f'source_type: {source_type}') harvest_source = HarvestSource( title, url, owner_org, source_type -) +) -harvest_source.get_record_changes() +harvest_source.get_record_changes() harvest_source.synchronize_records() harvest_source.report() From 2d1089aec8f6a25888162e2fa9646fc761a6c0db Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 1 Feb 2024 16:14:47 -0600 Subject: [PATCH 5/6] correct token name --- .github/workflows/load_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/load_test.yml b/.github/workflows/load_test.yml index a384d470..a4dc8094 100644 --- a/.github/workflows/load_test.yml +++ b/.github/workflows/load_test.yml @@ -37,7 +37,7 @@ jobs: SRC_OWNER_ORG: '82b85475-f85d-404a-b95b-89d1a42e9f6b' SRC_SOURCE_TYPE: 'datajson' CKAN_URL: ${{secrets.CKAN_URL}} - CKAN_API_TOKEN: ${{secrets.CKAN_API_TOKEN}} + CKAN_API_TOKEN: ${{secrets.CKAN_API_TOKEN_STAGING}} run: | source .venv/bin/activate poetry run python scripts/load_test.py From fa3079bfc202ccf31f5b4fd3bfe407421f308b75 Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 1 Feb 2024 17:27:40 -0600 Subject: [PATCH 6/6] change token to staging --- .github/workflows/load_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/load_test.yml b/.github/workflows/load_test.yml index a4dc8094..bae7cc74 100644 --- a/.github/workflows/load_test.yml +++ b/.github/workflows/load_test.yml @@ -36,7 +36,7 @@ jobs: SRC_URL: 'https://pasteur.epa.gov/metadata.json' SRC_OWNER_ORG: '82b85475-f85d-404a-b95b-89d1a42e9f6b' SRC_SOURCE_TYPE: 'datajson' - CKAN_URL: ${{secrets.CKAN_URL}} + CKAN_URL: ${{secrets.CKAN_URL_STAGING}} CKAN_API_TOKEN: ${{secrets.CKAN_API_TOKEN_STAGING}} run: | source .venv/bin/activate