From ff2ec526fc7b0d3e4da960c90765f6bb656c9079 Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 12 Sep 2024 11:41:42 -0500 Subject: [PATCH 1/5] adds pagination and count decorators for large endpoints; adds htmx and render_block libs to support re-rendering a partial template; adds USWDS pagination component and support templates; adds fixture json to support local dev and a CLI command to load testdata; --- app/__init__.py | 3 + app/forms.py | 5 - app/paginate.py | 28 +++ app/routes.py | 64 +++++- .../_scss/_uswds-theme-custom-styles.scss | 6 + app/static/package-lock.json | 6 + app/static/package.json | 1 + app/static/rollup.config.mjs | 1 + app/templates/base.html | 1 + .../components/pagination/pagination.html | 23 ++ .../pagination/pagination_arrow.html | 37 ++++ .../pagination/pagination_button.html | 23 ++ .../pagination/pagination_numbers.html | 65 ++++++ app/templates/view_job_data.html | 76 ++++--- app/templates/view_source_data.html | 3 +- database/interface.py | 147 +++++++++---- harvester/lib/cf_handler.py | 1 + poetry.lock | 36 +++- pyproject.toml | 4 +- requirements-dev.txt | 60 +++--- requirements.txt | 124 +++++------ tests/conftest.py | 203 +++++++++--------- tests/fixtures.json | 179 +++++++++++++++ tests/integration/app/test_load_manager.py | 9 +- tests/integration/database/test_db.py | 110 ++++++++-- tests/unit/test_pagination.py | 40 ++++ 26 files changed, 957 insertions(+), 298 deletions(-) create mode 100644 app/paginate.py create mode 100644 app/templates/components/pagination/pagination.html create mode 100644 app/templates/components/pagination/pagination_arrow.html create mode 100644 app/templates/components/pagination/pagination_button.html create mode 100644 app/templates/components/pagination/pagination_numbers.html create mode 100644 tests/fixtures.json create mode 100644 tests/unit/test_pagination.py diff --git a/app/__init__.py b/app/__init__.py index 159543c..2c9dc54 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -3,6 +3,7 @@ from dotenv import load_dotenv from flask import Flask from flask_bootstrap import Bootstrap +from flask_htmx import HTMX from flask_migrate import Migrate from app.filters import usa_icon @@ -19,6 +20,8 @@ def create_app(): app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False app.config["SECRET_KEY"] = os.getenv("FLASK_APP_SECRET_KEY") Bootstrap(app) + global htmx + htmx = HTMX(app) db.init_app(app) diff --git a/app/forms.py b/app/forms.py index 6d0ef98..281d1cd 100644 --- a/app/forms.py +++ b/app/forms.py @@ -35,11 +35,6 @@ class HarvestSourceForm(FlaskForm): choices=["manual", "daily", "weekly", "biweekly", "monthly"], validators=[DataRequired()], ) - size = SelectField( - "Size", - choices=["small", "medium", "large"], - validators=[DataRequired()], - ) schema_type = SelectField( "Schema Type", choices=["strict", "other"], validators=[DataRequired()] ) diff --git a/app/paginate.py b/app/paginate.py new file mode 100644 index 0000000..af291b7 --- /dev/null +++ b/app/paginate.py @@ -0,0 +1,28 @@ +import math + +from database.interface import PAGINATE_ENTRIES_PER_PAGE + + +class Pagination: + def __init__(self, current: int = 1, count: int = 1): + self.current = current + self.count = count + self.page_count = math.ceil(count / PAGINATE_ENTRIES_PER_PAGE) + self.per_page = PAGINATE_ENTRIES_PER_PAGE + + def to_dict(self): + return { + "current": self.current, + "count": self.count, + "page_count": self.page_count, + "page_label": "Page", + "per_page": self.per_page, + "next": {"label": "Next"}, + "previous": {"label": "Previous"}, + "last_item": { + "label": "Last page", + }, + } + + def update_current(self, current: int) -> dict: + self.current = int(current) diff --git a/app/routes.py b/app/routes.py index 49f78c3..891dbc9 100644 --- a/app/routes.py +++ b/app/routes.py @@ -12,11 +12,14 @@ from cryptography.hazmat.primitives.serialization import load_pem_private_key from dotenv import load_dotenv from flask import Blueprint, flash, redirect, render_template, request, session, url_for +from jinja2_fragments.flask import render_block from app.scripts.load_manager import schedule_first_job, trigger_manual_job from database.interface import HarvesterDBInterface +from . import htmx from .forms import HarvestSourceForm, OrganizationForm +from .paginate import Pagination logger = logging.getLogger("harvest_admin") @@ -24,6 +27,7 @@ mod = Blueprint("harvest", __name__) source = Blueprint("harvest_source", __name__) org = Blueprint("org", __name__) +testdata = Blueprint("testdata", __name__) db = HarvesterDBInterface() @@ -245,6 +249,31 @@ def cli_remove_harvest_source(id): print("Failed to delete harvest source") +## Load Test Data +# TODO move this into its own file when you break up routes +@testdata.cli.command("load") +def fixtures(): + """Load database fixtures from JSON.""" + import json + + file = "./tests/fixtures.json" + click.echo(f"Loading fixtures at `{file}`.") + with open(file, "r") as file: + fixture = json.load(file) + for item in fixture["organization"]: + db.add_organization(item) + for item in fixture["source"]: + db.add_harvest_source(item) + for item in fixture["job"]: + db.add_harvest_job(item) + for item in fixture["record"]: + db.add_harvest_record(item) + for item in fixture["record_error"]: + db.add_harvest_record_error(item) + + click.echo("Done.") + + # Helper Functions def make_new_source_contract(form): return { @@ -596,6 +625,32 @@ def add_harvest_job(): @mod.route("/harvest_job/", methods=["GET"]) @mod.route("/harvest_job/", methods=["GET"]) def get_harvest_job(job_id=None): + record_error_count = db.get_harvest_record_errors_by_job( + job_id, count=True, skip_pagination=True + ) + htmx_vars = { + "target_div": "#error_results_pagination", + "endpoint_url": f"/harvest_job/{job_id}", + } + + pagination = Pagination(count=record_error_count) + + if htmx: + page = request.args.get("page") + db_page = int(page) - 1 + record_errors = db.get_harvest_record_errors_by_job(job_id, page=db_page) + data = { + "harvest_job_id": job_id, + "record_errors": db._to_dict(record_errors), + "htmx_vars": htmx_vars, + } + pagination.update_current(page) + return render_block( + "view_job_data.html", + "record_errors_table", + data=data, + pagination=pagination.to_dict(), + ) if job_id: job = db.get_harvest_job(job_id) record_errors = db.get_harvest_record_errors_by_job(job_id) @@ -603,11 +658,15 @@ def get_harvest_job(job_id=None): return db._to_dict(job) if job else ("Not Found", 404) else: data = { + "harvest_job_id": job_id, "harvest_job": job, "harvest_job_dict": db._to_dict(job), "record_errors": db._to_dict(record_errors), + "htmx_vars": htmx_vars, } - return render_template("view_job_data.html", data=data) + return render_template( + "view_job_data.html", data=data, pagination=pagination.to_dict() + ) source_id = request.args.get("harvest_source_id") if source_id: @@ -667,7 +726,7 @@ def get_harvest_record(record_id=None): return "No harvest records found for this harvest source", 404 else: # TODO for test, will remove later - record = db.get_all_harvest_records() + record = db.pget_harvest_records() return db._to_dict(record) @@ -744,3 +803,4 @@ def register_routes(app): app.register_blueprint(user) app.register_blueprint(org) app.register_blueprint(source) + app.register_blueprint(testdata) diff --git a/app/static/_scss/_uswds-theme-custom-styles.scss b/app/static/_scss/_uswds-theme-custom-styles.scss index 8a739e2..09c28e9 100644 --- a/app/static/_scss/_uswds-theme-custom-styles.scss +++ b/app/static/_scss/_uswds-theme-custom-styles.scss @@ -56,3 +56,9 @@ ul.menu { .usa-card__img img { padding: 10px; } + +.usa-pagination { + &__item { + cursor: pointer; + } +} diff --git a/app/static/package-lock.json b/app/static/package-lock.json index 0a11864..49c78c4 100644 --- a/app/static/package-lock.json +++ b/app/static/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@uswds/uswds": "3.8.0", "chart.js": "^4.4.2", + "htmx.org": "^2.0.2", "rollup": "^4.18.0" }, "devDependencies": { @@ -4408,6 +4409,11 @@ "entities": "^4.4.0" } }, + "node_modules/htmx.org": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/htmx.org/-/htmx.org-2.0.2.tgz", + "integrity": "sha512-eUPIpQaWKKstX393XNCRCMJTrqPzikh36Y9RceqsUZLTtlFjFaVDgwZLUsrFk8J2uzZxkkfiy0TE359j2eN6hA==" + }, "node_modules/iconv-lite": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", diff --git a/app/static/package.json b/app/static/package.json index 19b5019..cf26c1f 100644 --- a/app/static/package.json +++ b/app/static/package.json @@ -25,6 +25,7 @@ "dependencies": { "@uswds/uswds": "3.8.0", "chart.js": "^4.4.2", + "htmx.org": "^2.0.2", "rollup": "^4.18.0" }, "devDependencies": { diff --git a/app/static/rollup.config.mjs b/app/static/rollup.config.mjs index 964a928..9f0af73 100644 --- a/app/static/rollup.config.mjs +++ b/app/static/rollup.config.mjs @@ -8,6 +8,7 @@ export default { targets: [ { src: './node_modules/chart.js/dist/chart.umd.js', dest: './assets/chartjs/' }, { src: './node_modules/chart.js/dist/chart.umd.js.map', dest: './assets/chartjs/' }, + { src: './node_modules/htmx.org/dist/htmx.min.js', dest: './assets/htmx/' }, ] }) ] diff --git a/app/templates/base.html b/app/templates/base.html index 72859d9..ec55345 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -8,6 +8,7 @@ + {% block script_head %} {% endblock %} diff --git a/app/templates/components/pagination/pagination.html b/app/templates/components/pagination/pagination.html new file mode 100644 index 0000000..a2cd88b --- /dev/null +++ b/app/templates/components/pagination/pagination.html @@ -0,0 +1,23 @@ +{% from 'components/pagination/pagination_arrow.html' import pagination_arrow %} + +{% set overflow %} +
  • + + … + +
  • +{% endset %} + + diff --git a/app/templates/components/pagination/pagination_arrow.html b/app/templates/components/pagination/pagination_arrow.html new file mode 100644 index 0000000..7292f4a --- /dev/null +++ b/app/templates/components/pagination/pagination_arrow.html @@ -0,0 +1,37 @@ +{# + The full pagination data object is passed so we can access current state, aria labels, and text labels. + #} + {% macro pagination_arrow(direction, pagination, htmx_vars) %} + {% set page_var = ((pagination.current - 1) if direction == 'previous' else (pagination.current + 1)) | string() %} + {% set placeholder_link = htmx_vars.endpoint_url + "?page=" + page_var %} + + {% set link_attrs = { + 'class': 'usa-pagination__link usa-pagination__' ~ direction ~ '-page', + 'aria_label': pagination[direction]['label'] ~ ' ' ~ pagination.page_label | lower + } %} + +
  • + + {% if direction == 'previous' %} + + {% endif %} + + {{ pagination[direction]['label'] }} + + {% if direction == 'next' %} + + {% endif %} + +
  • + {% endmacro %} diff --git a/app/templates/components/pagination/pagination_button.html b/app/templates/components/pagination/pagination_button.html new file mode 100644 index 0000000..e2c8544 --- /dev/null +++ b/app/templates/components/pagination/pagination_button.html @@ -0,0 +1,23 @@ +{% macro pagination_button(item, pager_opts, htmx_vars) %} + {% set is_current = (item == pager_opts.current) %} + {% set is_last = (item == pager_opts.total) %} + {% set labels = pager_opts.aria_labels %} + {# HTMX page vars#} + {% set item_str = item | string() %} + {% set placeholder_link = htmx_vars.endpoint_url + "?page=" + item_str %} + + {# Display: "Last page, page X" if last item. Otherwise "Page X" #} + {% set aria_label = (labels.last ~ " " ~ labels.page_label | lower if is_last else labels.page_label) ~ " " ~ item %} + +
  • + {# Global variable placeholder_link doesn't work for some reason. #} + + {{ item }} + +
  • +{% endmacro %} diff --git a/app/templates/components/pagination/pagination_numbers.html b/app/templates/components/pagination/pagination_numbers.html new file mode 100644 index 0000000..fb84cb1 --- /dev/null +++ b/app/templates/components/pagination/pagination_numbers.html @@ -0,0 +1,65 @@ +{% from "components/pagination/pagination_button.html" import pagination_button %} + +{# Add +1 to first_five / last_five due to how ranges work in jinja #} +{% set pager_ranges = { + 'default': range(pagination.current - 1, pagination.current + 1), + 'last_item': pagination.page_count, + 'first_five': range(1, 5 + 1), + 'last_five': range(pagination.page_count - 4, pagination.page_count + 1), + } +%} + +{% set pager_button_opts = { + 'current': pagination.current, + 'total': pagination.page_count, + 'aria_labels': { + 'page_label': pagination.page_label, + 'previous': pagination.previous.label, + 'next': pagination.next.label, + 'last': pagination.last_item.label + } +} %} + +{# Page numbers #} +{# List all items if less than 7 #} +{% if pagination.page_count <= 7 %} + {% for item in range(1, pagination.page_count) %} + {{ pagination_button(item, pager_button_opts, data.htmx_vars) }} + {% endfor %} +{# User is at the start of a long dataset #} +{# Example: 1, 2, 3, *4*, 5 … 8 #} +{# Doesn't apply when user gets to 5 of 8 #} +{% elif pagination.current <= 4 and pagination.page_count >= 7 %} + {% for item in pager_ranges.first_five %} + {{ pagination_button(item, pager_button_opts, data.htmx_vars) }} + {% endfor %} + + {{ overflow | trim | safe }} + + {{ pagination_button(pager_ranges.last_item, pager_button_opts, data.htmx_vars) }} + +{# When user is close to the end of dataset #} +{# Example: 1 … 4, *5*, 6, 7, 8 #} +{% elif pagination.current >= pagination.page_count - 3 %} + {{ pagination_button(1, pager_button_opts, data.htmx_vars) }} + + {{ overflow | trim | safe }} + {% for item in pager_ranges.last_five %} + {{ pagination_button(item, pager_button_opts, data.htmx_vars) }} + {% endfor %} +{# Default case: Current - 1, Current, Current + 1 #} +{# Example: 1 … 21, *22*, 23 … 50 #} +{# Example: 1 … 4, *5*, 6 … 9 #} +{% else %} + {{ pagination_button(1, pager_button_opts, data.htmx_vars) }} + + {{ overflow | trim | safe }} + + {% for item in pager_ranges.default %} + {{ pagination_button(item, pager_button_opts, data.htmx_vars) }} + {% endfor %} + + {{ overflow | trim | safe }} + + {{ pagination_button(pager_ranges.last_item, pager_button_opts, data.htmx_vars) }} +{% endif %} diff --git a/app/templates/view_job_data.html b/app/templates/view_job_data.html index 4b5a168..718bb14 100644 --- a/app/templates/view_job_data.html +++ b/app/templates/view_job_data.html @@ -6,11 +6,11 @@ {% block content %}
    - {% if not data.harvest_job.id %} + {% if not data.harvest_job_id %}

    Whooops!

    Looks like you navigated to a job that doesn't exist.

    {% else %} -

    Details for Harvest Job Id: {{data.harvest_job.id}}

    +

    Details for Harvest Job Id: {{data.harvest_job_id}}

    For Harvest Source Id: {{data.harvest_job.harvest_source_id}} @@ -34,7 +34,7 @@

    Job Error Table

    {% else %}
    - + @@ -64,32 +64,50 @@

    Record Error Table

    No record errors found {% else %}
    -
    Harvest Job Errors for {{data.harvest_job.id}} Harvest Job Errors for {{data.harvest_job_id}}
    Date Created
    - - - - - - - - - - - - {% for errors in data.record_errors %} - - - - - - - - {% endfor %} - -
    Harvest Error Info for {{data.harvest_job.id}}
    Date CreatedIdHarvest Record IdMessageType
    {{errors.date_created}}{{errors.id}} - - {{errors.harvest_record_id}} {{errors.message}}{{errors.type}}
    + {% block record_errors_table %} +
    + + + + + + + + + + + + + + {% for errors in data.record_errors %} + + + + + + + + {% endfor %} + {% if pagination.per_page > data.record_errors|count and pagination.count > data.record_errors|count %} + {% for number in range(pagination.per_page - data.record_errors|count) %} + + {% for number in range(5) %} + + {% endfor %} + + {% endfor %} + {% endif %} + +
    Harvest Record Errors for {{data.harvest_job_id}}
    Date CreatedIdHarvest Record IdMessageType
    {{errors.date_created}}{{errors.id}} + + {{errors.harvest_record_id}} {{errors.message}}{{errors.type}}
     
    + {% if pagination.count > data.record_errors|count %} + {% include '/components/pagination/pagination.html' %} + {%endif%} +
    + + {% endblock %}
    {% endif %} diff --git a/app/templates/view_source_data.html b/app/templates/view_source_data.html index 09da4bc..2ad8f8c 100644 --- a/app/templates/view_source_data.html +++ b/app/templates/view_source_data.html @@ -1,7 +1,8 @@ {% extends 'base.html' %} {% block script_head %} - + + {% endblock %} {% block title %} diff --git a/database/interface.py b/database/interface.py index 405eadc..47bc363 100644 --- a/database/interface.py +++ b/database/interface.py @@ -1,8 +1,9 @@ import os import uuid from datetime import datetime, timezone +from functools import wraps -from sqlalchemy import create_engine, inspect, or_, text +from sqlalchemy import create_engine, func, inspect, or_, select, text from sqlalchemy.exc import NoResultFound from sqlalchemy.orm import scoped_session, sessionmaker from ckanapi import RemoteCKAN @@ -21,6 +22,41 @@ ) DATABASE_URI = os.getenv("DATABASE_URI") +PAGINATE_ENTRIES_PER_PAGE = 20 +PAGINATE_START_PAGE = 0 + + +def paginate(fn): + @wraps(fn) + def _impl(self, *args, **kwargs): + query = fn(self, *args, **kwargs) + if kwargs.get("skip_pagination") is True: + return query + elif kwargs.get("paginate") is False: + return query.all() + else: + per_page = kwargs.get("per_page") or PAGINATE_ENTRIES_PER_PAGE + page = kwargs.get("page") or PAGINATE_START_PAGE + query = query.limit(per_page) + query = query.offset(page * per_page) + return query.all() + + return _impl + + +def count(fn): + @wraps(fn) + def _impl(self, *args, **kwargs): + query = fn(self, *args, **kwargs) + if kwargs.get("count") is True: + count_q = query.statement.with_only_columns(*[func.count()]).order_by(None) + count = query.session.execute(count_q).scalar() + return count + else: + return query + + return _impl + logging.basicConfig(level=logging.INFO) logger = logging.getLogger() @@ -158,13 +194,15 @@ def clear_harvest_source(self, source_id): # delete all HarvestRecords and related HarvestRecordErrors def _clear_harvest_records(): - self.db.query(HarvestRecordError).filter( - HarvestRecordError.harvest_record_id.in_( - self.db.query(HarvestRecord.id).filter_by(harvest_source_id=source_id) + self.db.query(HarvestRecordError).filter( + HarvestRecordError.harvest_record_id.in_( + self.db.query(HarvestRecord.id).filter_by( + harvest_source_id=source_id ) - ).delete(synchronize_session=False) - self.db.query(HarvestRecord).filter_by(harvest_source_id=source_id).delete() - self.db.commit() + ) + ).delete(synchronize_session=False) + self.db.query(HarvestRecord).filter_by(harvest_source_id=source_id).delete() + self.db.commit() source = self.db.get(HarvestSource, source_id) if source is None: @@ -173,50 +211,53 @@ def _clear_harvest_records(): organization_id = source.organization_id records = ( - self.db.query(HarvestRecord) - .filter_by(harvest_source_id=source_id).all() + self.db.query(HarvestRecord).filter_by(harvest_source_id=source_id).all() ) if not records: return "Harvest source has no records to clear." ckan_ids = [record.ckan_id for record in records if record.ckan_id is not None] - error_records = [record for record in records if record.status == 'error'] + error_records = [record for record in records if record.status == "error"] jobs_in_progress = self.get_all_harvest_jobs_by_filter( {"harvest_source_id": source.id, "status": "in_progress"} ) # Ensure no jobs are in progress if jobs_in_progress: - return ("Error: A harvest job is currently in progress. " - "Cannot clear datasets.") + return ( + "Error: A harvest job is currently in progress. " + "Cannot clear datasets." + ) # Ensure (error_records + ckan_ids) = total records if len(error_records) + len(ckan_ids) != len(records): - return ("Error: Not all records are either in an error state " - "or have a CKAN ID. Cannot proceed without clearing the dataset.") + return ( + "Error: Not all records are either in an error state " + "or have a CKAN ID. Cannot proceed without clearing the dataset." + ) - if not ckan_ids: + if not ckan_ids: _clear_harvest_records() return "Harvest source cleared successfully." - ckan = RemoteCKAN( - os.getenv("CKAN_API_URL"), apikey=os.getenv("CKAN_API_TOKEN") - ) + ckan = RemoteCKAN(os.getenv("CKAN_API_URL"), apikey=os.getenv("CKAN_API_TOKEN")) - result = ckan.action.package_search(fq=f'owner_org:{organization_id}') - ckan_datasets = result['count'] + result = ckan.action.package_search(fq=f"owner_org:{organization_id}") + ckan_datasets = result["count"] start = datetime.now(timezone.utc) retry_count = 0 retry_max = 20 - # Retry loop to handle timeouts from cloud.gov and CKAN's Solr backend, + # Retry loop to handle timeouts from cloud.gov and CKAN's Solr backend, # ensuring datasets are cleared despite possible interruptions. while ckan_datasets > 0 and retry_count < retry_max: - result = ckan.action.package_search(fq=f'owner_org:{organization_id}') - ckan_datasets = result['count'] - logger.info(f"Attempt {retry_count + 1}: " - f"{ckan_datasets} datasets remaining in CKAN") + result = ckan.action.package_search(fq=f"owner_org:{organization_id}") + ckan_datasets = result["count"] + logger.info( + f"Attempt {retry_count + 1}: " + f"{ckan_datasets} datasets remaining in CKAN" + ) try: ckan.action.bulk_update_delete( datasets=ckan_ids, org_id=organization_id @@ -237,8 +278,10 @@ def _clear_harvest_records(): logger.info(f"Total time: {datetime.now(timezone.utc) - start}") return "Harvest source cleared successfully." else: - fail_message = (f"Harvest source clearance failed after {retry_count} " - f"attempts. {ckan_datasets} datasets still exist in CKAN.") + fail_message = ( + f"Harvest source clearance failed after {retry_count} " + f"attempts. {ckan_datasets} datasets still exist in CKAN." + ) logger.error(fail_message) return fail_message @@ -248,18 +291,18 @@ def delete_harvest_source(self, source_id): return "Harvest source not found" records = ( - self.db.query(HarvestRecord) - .filter_by(harvest_source_id=source_id).all() + self.db.query(HarvestRecord).filter_by(harvest_source_id=source_id).all() ) - + if len(records) == 0: self.db.delete(source) self.db.commit() return "Harvest source deleted successfully" else: - return (f"Failed: {len(records)} records in the Harvest source, " - "please Clear it first.") - + return ( + f"Failed: {len(records)} records in the Harvest source, " + "please Clear it first." + ) ## HARVEST JOB def add_harvest_job(self, job_data): @@ -372,9 +415,18 @@ def get_harvest_job_errors_by_job(self, job_id: str) -> list[dict]: job = self.get_harvest_job(job_id) return [error for error in job.errors or []] - def get_harvest_record_errors_by_job(self, job_id: str): - job = self.get_harvest_job(job_id) - return [error for record in job.records or [] for error in record.errors or []] + @count + @paginate + def get_harvest_record_errors_by_job(self, job_id: str, **kwargs): + subquery = ( + self.db.query(HarvestRecord.id) + .filter(HarvestRecord.status == "error") + .subquery() + ) + query = self.db.query(HarvestRecordError).filter( + HarvestRecordError.harvest_record_id.in_(select(subquery)) + ) + return query def get_harvest_error(self, error_id: str) -> dict: job_query = self.db.query(HarvestJobError).filter_by(id=error_id).first() @@ -387,7 +439,6 @@ def get_harvest_error(self, error_id: str) -> dict: return None def get_harvest_record_errors_by_record(self, record_id: str): - # TODO: paginate this errors = self.db.query(HarvestRecordError).filter_by( harvest_record_id=record_id ) @@ -552,6 +603,24 @@ def verify_user(self, usr_data): print("Error:", e) return False + #### PAGINATED QUERIES + + @paginate + def pget_harvest_jobs(self, filter=text(""), **kwargs): + return self.db.query(HarvestJob).filter(filter) + + @paginate + def pget_harvest_records(self, filter=text(""), **kwargs): + return self.db.query(HarvestRecord).filter(filter) + + @paginate + def pget_harvest_job_errors(self, filter=text(""), **kwargs): + return self.db.query(HarvestJobError).filter(filter) + + @paginate + def pget_harvest_record_errors(self, filter=text(""), **kwargs): + return self.db.query(HarvestRecordError).filter(filter) + ##### TEST INTERFACES BELOW ##### ######## TO BE REMOVED ########## def get_all_harvest_jobs(self): @@ -559,10 +628,6 @@ def get_all_harvest_jobs(self): harvest_jobs_data = [job for job in harvest_jobs] return harvest_jobs_data - def get_all_harvest_records(self): - harvest_records = self.db.query(HarvestRecord).all() - return [record for record in harvest_records or []] - def get_all_harvest_errors(self): job_errors = self.db.query(HarvestJobError).all() record_errors = self.db.query(HarvestRecordError).all() diff --git a/harvester/lib/cf_handler.py b/harvester/lib/cf_handler.py index f5d2eef..a19b7c9 100644 --- a/harvester/lib/cf_handler.py +++ b/harvester/lib/cf_handler.py @@ -1,4 +1,5 @@ import os + from cloudfoundry_client.client import CloudFoundryClient from cloudfoundry_client.v3.tasks import TaskManager diff --git a/poetry.lock b/poetry.lock index 2026c48..8022876 100644 --- a/poetry.lock +++ b/poetry.lock @@ -769,6 +769,20 @@ dominate = "*" Flask = ">=0.8" visitor = "*" +[[package]] +name = "flask-htmx" +version = "0.3.2" +description = "A Flask extension to work with HTMX." +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "flask_htmx-0.3.2-py3-none-any.whl", hash = "sha256:a1e0071216349197d6669662c2c35a1ab849b6fca28c89dc90932761f7e73c05"}, + {file = "flask_htmx-0.3.2.tar.gz", hash = "sha256:8def77bb292369ff77513ff7b76d27b06f83e1d8c21165b6714c06c1cc2b9275"}, +] + +[package.dependencies] +Flask = ">=2.0.2,<4.0.0" + [[package]] name = "flask-migrate" version = "4.0.7" @@ -1068,6 +1082,24 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jinja2-fragments" +version = "1.6.0" +description = "Render Jinja2 template block as HTML page fragments on Python web frameworks." +optional = false +python-versions = "*" +files = [ + {file = "jinja2_fragments-1.6.0-py3-none-any.whl", hash = "sha256:49a5815cd7210edf234bf137b6005abf1ad336ff93c65519deddc41d91713986"}, + {file = "jinja2_fragments-1.6.0.tar.gz", hash = "sha256:5bd7dd49b7dbfa174d45d6a991cfe3cba4a08a4b66f87cf248aa739eebc435f8"}, +] + +[package.dependencies] +jinja2 = ">=3.1.0" + +[package.extras] +dev = ["pre-commit"] +tests = ["fastapi", "flask (>=2.1.0)", "litestar[standard]", "pytest", "pytest-asyncio", "quart (>=0.18.0)", "sanic", "sanic-ext", "sanic-testing", "starlette[full]"] + [[package]] name = "jsonschema" version = "4.21.1" @@ -2347,5 +2379,5 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" -python-versions = ">=3.10" -content-hash = "d81c4af263f6891c477453ddc74ed90cc7da9d3cdac3e945c1ced15930a757ff" +python-versions = ">=3.10,<4.0" +content-hash = "998c720a492a425387d626eaa210b0c479e6683e2a977c0f6f598c239dcb2df6" diff --git a/pyproject.toml b/pyproject.toml index bbb3c66..6f7c32c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ license = "LICENSE.md" # TODO fix me repository = "https://github.com/GSA/datagov-harvester" [tool.poetry.dependencies] -python = ">=3.10" +python = ">=3.10,<4.0" jsonschema = ">=4" python-dotenv = ">=1" deepdiff = ">=6" @@ -32,6 +32,8 @@ cloudfoundry-client = "^1.36.0" pyjwt = "^2.8.0" cryptography = "^42.0.8" boltons = "^24.0.0" +jinja2-fragments = "^1.6.0" +flask-htmx = "^0.3.2" [tool.poetry.group.dev.dependencies] pytest = ">=7.3.2" diff --git a/requirements-dev.txt b/requirements-dev.txt index 08a8dad..256fac5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,30 +1,30 @@ -black==24.4.2 ; python_version >= "3.10" -cfgv==3.4.0 ; python_version >= "3.10" -click==8.1.7 ; python_version >= "3.10" -colorama==0.4.6 ; python_version >= "3.10" and (sys_platform == "win32" or platform_system == "Windows") -coverage[toml]==7.5.0 ; python_version >= "3.10" -debugpy==1.8.1 ; python_version >= "3.10" -distlib==0.3.8 ; python_version >= "3.10" -exceptiongroup==1.2.1 ; python_version < "3.11" and python_version >= "3.10" -filelock==3.14.0 ; python_version >= "3.10" -freezegun==1.5.1 ; python_version >= "3.10" -identify==2.5.36 ; python_version >= "3.10" -iniconfig==2.0.0 ; python_version >= "3.10" -isort==5.13.2 ; python_version >= "3.10" -mypy-extensions==1.0.0 ; python_version >= "3.10" -nodeenv==1.8.0 ; python_version >= "3.10" -packaging==24.0 ; python_version >= "3.10" -pathspec==0.12.1 ; python_version >= "3.10" -platformdirs==4.2.1 ; python_version >= "3.10" -pluggy==1.5.0 ; python_version >= "3.10" -pre-commit==3.7.1 ; python_version >= "3.10" -pytest-cov==4.1.0 ; python_version >= "3.10" -pytest==8.1.1 ; python_version >= "3.10" -python-dateutil==2.9.0.post0 ; python_version >= "3.10" -pyyaml==6.0.1 ; python_version >= "3.10" -ruff==0.0.291 ; python_version >= "3.10" -setuptools==69.5.1 ; python_version >= "3.10" -six==1.16.0 ; python_version >= "3.10" -tomli==2.0.1 ; python_full_version <= "3.11.0a6" and python_version >= "3.10" -typing-extensions==4.11.0 ; python_version < "3.11" and python_version >= "3.10" -virtualenv==20.26.2 ; python_version >= "3.10" +black==24.4.2 ; python_version >= "3.10" and python_version < "4.0" +cfgv==3.4.0 ; python_version >= "3.10" and python_version < "4.0" +click==8.1.7 ; python_version >= "3.10" and python_version < "4.0" +colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and (sys_platform == "win32" or platform_system == "Windows") +coverage[toml]==7.5.0 ; python_version >= "3.10" and python_version < "4.0" +debugpy==1.8.1 ; python_version >= "3.10" and python_version < "4.0" +distlib==0.3.8 ; python_version >= "3.10" and python_version < "4.0" +exceptiongroup==1.2.1 ; python_version >= "3.10" and python_version < "3.11" +filelock==3.14.0 ; python_version >= "3.10" and python_version < "4.0" +freezegun==1.5.1 ; python_version >= "3.10" and python_version < "4.0" +identify==2.5.36 ; python_version >= "3.10" and python_version < "4.0" +iniconfig==2.0.0 ; python_version >= "3.10" and python_version < "4.0" +isort==5.13.2 ; python_version >= "3.10" and python_version < "4.0" +mypy-extensions==1.0.0 ; python_version >= "3.10" and python_version < "4.0" +nodeenv==1.8.0 ; python_version >= "3.10" and python_version < "4.0" +packaging==24.0 ; python_version >= "3.10" and python_version < "4.0" +pathspec==0.12.1 ; python_version >= "3.10" and python_version < "4.0" +platformdirs==4.2.1 ; python_version >= "3.10" and python_version < "4.0" +pluggy==1.5.0 ; python_version >= "3.10" and python_version < "4.0" +pre-commit==3.7.1 ; python_version >= "3.10" and python_version < "4.0" +pytest-cov==4.1.0 ; python_version >= "3.10" and python_version < "4.0" +pytest==8.1.1 ; python_version >= "3.10" and python_version < "4.0" +python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4.0" +pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "4.0" +ruff==0.0.291 ; python_version >= "3.10" and python_version < "4.0" +setuptools==69.5.1 ; python_version >= "3.10" and python_version < "4.0" +six==1.16.0 ; python_version >= "3.10" and python_version < "4.0" +tomli==2.0.1 ; python_version >= "3.10" and python_full_version <= "3.11.0a6" +typing-extensions==4.11.0 ; python_version >= "3.10" and python_version < "3.11" +virtualenv==20.26.2 ; python_version >= "3.10" and python_version < "4.0" diff --git a/requirements.txt b/requirements.txt index 2a49425..de8125f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,61 +1,63 @@ -aiohttp==3.9.5 ; python_version >= "3.10" -aiosignal==1.3.1 ; python_version >= "3.10" -alembic==1.13.1 ; python_version >= "3.10" -async-timeout==4.0.3 ; python_version < "3.11" and python_version >= "3.10" -attrs==23.2.0 ; python_version >= "3.10" -beautifulsoup4==4.12.3 ; python_version >= "3.10" -blinker==1.7.0 ; python_version >= "3.10" -boltons==24.0.0 ; python_version >= "3.10" -certifi==2024.2.2 ; python_version >= "3.10" -cffi==1.16.0 ; platform_python_implementation != "PyPy" and python_version >= "3.10" -charset-normalizer==3.3.2 ; python_version >= "3.10" -ckanapi==4.8 ; python_version >= "3.10" -click==8.1.7 ; python_version >= "3.10" -cloudfoundry-client==1.36.0 ; python_version >= "3.10" -colorama==0.4.6 ; python_version >= "3.10" and platform_system == "Windows" -cryptography==42.0.8 ; python_version >= "3.10" -deepdiff==7.0.1 ; python_version >= "3.10" -docopt==0.6.2 ; python_version >= "3.10" -dominate==2.9.1 ; python_version >= "3.10" -flask-bootstrap==3.3.7.1 ; python_version >= "3.10" -flask-migrate==4.0.7 ; python_version >= "3.10" -flask-sqlalchemy==3.1.1 ; python_version >= "3.10" -flask-wtf==1.2.1 ; python_version >= "3.10" -flask==3.0.3 ; python_version >= "3.10" -frozenlist==1.4.1 ; python_version >= "3.10" -greenlet==3.0.3 ; (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") and python_version >= "3.10" -idna==3.7 ; python_version >= "3.10" -itsdangerous==2.2.0 ; python_version >= "3.10" -jinja2==3.1.3 ; python_version >= "3.10" -jsonschema-specifications==2023.12.1 ; python_version >= "3.10" -jsonschema==4.21.1 ; python_version >= "3.10" -mako==1.3.3 ; python_version >= "3.10" -markupsafe==2.1.5 ; python_version >= "3.10" -multidict==6.0.5 ; python_version >= "3.10" -oauth2-client==1.4.2 ; python_version >= "3.10" -ordered-set==4.1.0 ; python_version >= "3.10" -polling2==0.5.0 ; python_version >= "3.10" -protobuf==4.25.3 ; python_version >= "3.10" -psycopg2-binary==2.9.9 ; python_version >= "3.10" -pycparser==2.22 ; platform_python_implementation != "PyPy" and python_version >= "3.10" -pyjwt==2.8.0 ; python_version >= "3.10" -python-dotenv==1.0.1 ; python_version >= "3.10" -python-slugify==8.0.4 ; python_version >= "3.10" -pyyaml==6.0.1 ; python_version >= "3.10" -referencing==0.35.0 ; python_version >= "3.10" -requests==2.31.0 ; python_version >= "3.10" -rpds-py==0.18.0 ; python_version >= "3.10" -sansjson==0.3.0 ; python_version >= "3.10" -setuptools==69.5.1 ; python_version >= "3.10" -simplejson==3.19.2 ; python_version >= "3.10" -six==1.16.0 ; python_version >= "3.10" -soupsieve==2.5 ; python_version >= "3.10" -sqlalchemy==2.0.29 ; python_version >= "3.10" -text-unidecode==1.3 ; python_version >= "3.10" -typing-extensions==4.11.0 ; python_version >= "3.10" -urllib3==2.2.1 ; python_version >= "3.10" -visitor==0.1.3 ; python_version >= "3.10" -websocket-client==1.7.0 ; python_version >= "3.10" -werkzeug==3.0.2 ; python_version >= "3.10" -wtforms==3.1.2 ; python_version >= "3.10" -yarl==1.9.4 ; python_version >= "3.10" +aiohttp==3.9.5 ; python_version >= "3.10" and python_version < "4.0" +aiosignal==1.3.1 ; python_version >= "3.10" and python_version < "4.0" +alembic==1.13.1 ; python_version >= "3.10" and python_version < "4.0" +async-timeout==4.0.3 ; python_version >= "3.10" and python_version < "3.11" +attrs==23.2.0 ; python_version >= "3.10" and python_version < "4.0" +beautifulsoup4==4.12.3 ; python_version >= "3.10" and python_version < "4.0" +blinker==1.7.0 ; python_version >= "3.10" and python_version < "4.0" +boltons==24.0.0 ; python_version >= "3.10" and python_version < "4.0" +certifi==2024.2.2 ; python_version >= "3.10" and python_version < "4.0" +cffi==1.16.0 ; python_version >= "3.10" and python_version < "4.0" and platform_python_implementation != "PyPy" +charset-normalizer==3.3.2 ; python_version >= "3.10" and python_version < "4.0" +ckanapi==4.8 ; python_version >= "3.10" and python_version < "4.0" +click==8.1.7 ; python_version >= "3.10" and python_version < "4.0" +cloudfoundry-client==1.36.0 ; python_version >= "3.10" and python_version < "4.0" +colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Windows" +cryptography==42.0.8 ; python_version >= "3.10" and python_version < "4.0" +deepdiff==7.0.1 ; python_version >= "3.10" and python_version < "4.0" +docopt==0.6.2 ; python_version >= "3.10" and python_version < "4.0" +dominate==2.9.1 ; python_version >= "3.10" and python_version < "4.0" +flask-bootstrap==3.3.7.1 ; python_version >= "3.10" and python_version < "4.0" +flask-htmx==0.3.2 ; python_version >= "3.10" and python_version < "4.0" +flask-migrate==4.0.7 ; python_version >= "3.10" and python_version < "4.0" +flask-sqlalchemy==3.1.1 ; python_version >= "3.10" and python_version < "4.0" +flask-wtf==1.2.1 ; python_version >= "3.10" and python_version < "4.0" +flask==3.0.3 ; python_version >= "3.10" and python_version < "4.0" +frozenlist==1.4.1 ; python_version >= "3.10" and python_version < "4.0" +greenlet==3.0.3 ; python_version >= "3.10" and python_version < "4.0" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32") +idna==3.7 ; python_version >= "3.10" and python_version < "4.0" +itsdangerous==2.2.0 ; python_version >= "3.10" and python_version < "4.0" +jinja2-fragments==1.6.0 ; python_version >= "3.10" and python_version < "4.0" +jinja2==3.1.3 ; python_version >= "3.10" and python_version < "4.0" +jsonschema-specifications==2023.12.1 ; python_version >= "3.10" and python_version < "4.0" +jsonschema==4.21.1 ; python_version >= "3.10" and python_version < "4.0" +mako==1.3.3 ; python_version >= "3.10" and python_version < "4.0" +markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "4.0" +multidict==6.0.5 ; python_version >= "3.10" and python_version < "4.0" +oauth2-client==1.4.2 ; python_version >= "3.10" and python_version < "4.0" +ordered-set==4.1.0 ; python_version >= "3.10" and python_version < "4.0" +polling2==0.5.0 ; python_version >= "3.10" and python_version < "4.0" +protobuf==4.25.3 ; python_version >= "3.10" and python_version < "4.0" +psycopg2-binary==2.9.9 ; python_version >= "3.10" and python_version < "4.0" +pycparser==2.22 ; python_version >= "3.10" and python_version < "4.0" and platform_python_implementation != "PyPy" +pyjwt==2.8.0 ; python_version >= "3.10" and python_version < "4.0" +python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "4.0" +python-slugify==8.0.4 ; python_version >= "3.10" and python_version < "4.0" +pyyaml==6.0.1 ; python_version >= "3.10" and python_version < "4.0" +referencing==0.35.0 ; python_version >= "3.10" and python_version < "4.0" +requests==2.31.0 ; python_version >= "3.10" and python_version < "4.0" +rpds-py==0.18.0 ; python_version >= "3.10" and python_version < "4.0" +sansjson==0.3.0 ; python_version >= "3.10" and python_version < "4.0" +setuptools==69.5.1 ; python_version >= "3.10" and python_version < "4.0" +simplejson==3.19.2 ; python_version >= "3.10" and python_version < "4.0" +six==1.16.0 ; python_version >= "3.10" and python_version < "4.0" +soupsieve==2.5 ; python_version >= "3.10" and python_version < "4.0" +sqlalchemy==2.0.29 ; python_version >= "3.10" and python_version < "4.0" +text-unidecode==1.3 ; python_version >= "3.10" and python_version < "4.0" +typing-extensions==4.11.0 ; python_version >= "3.10" and python_version < "4.0" +urllib3==2.2.1 ; python_version >= "3.10" and python_version < "4.0" +visitor==0.1.3 ; python_version >= "3.10" and python_version < "4.0" +websocket-client==1.7.0 ; python_version >= "3.10" and python_version < "4.0" +werkzeug==3.0.2 ; python_version >= "3.10" and python_version < "4.0" +wtforms==3.1.2 ; python_version >= "3.10" and python_version < "4.0" +yarl==1.9.4 ; python_version >= "3.10" and python_version < "4.0" diff --git a/tests/conftest.py b/tests/conftest.py index b55217f..12abf1f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,8 @@ +import json import logging import os from pathlib import Path -from typing import Any, Generator +from typing import Any, Generator, List from unittest.mock import patch import pytest @@ -80,34 +81,33 @@ def default_function_fixture(interface): logger.info("Patching complete. Unpatching") +@pytest.fixture +def fixtures_json(): + file = Path(__file__).parents[0] / "fixtures.json" + with open(file, "r") as file: + return json.load(file) + + ## ORGS @pytest.fixture -def organization_data() -> dict: - return { - "name": "Test Org", - "logo": "https://example.com/logo.png", - "id": "d925f84d-955b-4cb7-812f-dcfd6681a18f", - } +def organization_data(fixtures_json) -> dict: + return fixtures_json["organization"][0] @pytest.fixture -def organization_orm(organization_data: dict) -> Organization: +def organization_data_orm(organization_data: dict) -> Organization: return Organization(**organization_data) ## HARVEST SOURCES @pytest.fixture -def source_data_dcatus(organization_data: dict) -> dict: - return { - "id": "2f2652de-91df-4c63-8b53-bfced20b276b", - "name": "Test Source", - "notification_emails": "email@example.com", - "organization_id": organization_data["id"], - "frequency": "daily", - "url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus.json", - "schema_type": "type1", - "source_type": "dcatus", - } +def source_data_dcatus(fixtures_json) -> dict: + return fixtures_json["source"][0] + + +@pytest.fixture +def source_data_dcatus_orm(source_data_dcatus: dict) -> HarvestSource: + return HarvestSource(**source_data_dcatus) @pytest.fixture @@ -138,11 +138,6 @@ def source_data_dcatus_same_title(organization_data: dict) -> dict: } -@pytest.fixture -def source_orm_dcatus(source_data_dcatus: dict) -> HarvestSource: - return HarvestSource(**source_data_dcatus) - - @pytest.fixture def source_data_waf(organization_data: dict) -> dict: return { @@ -172,103 +167,82 @@ def source_data_dcatus_invalid(organization_data: dict) -> dict: @pytest.fixture -def job_data_dcatus(source_data_dcatus: dict) -> dict: - return { - "id": "6bce761c-7a39-41c1-ac73-94234c139c76", - "status": "new", - "harvest_source_id": source_data_dcatus["id"], - } - - -@pytest.fixture -def job_data_dcatus_2(source_data_dcatus: dict) -> dict: +def source_data_dcatus_single_record(organization_data: dict) -> dict: return { - "id": "392ac4b3-79a6-414b-a2b3-d6c607d3b8d4", - "status": "new", - "harvest_source_id": source_data_dcatus["id"], + "id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "name": "Single Record Test Source", + "notification_emails": "email@example.com", + "organization_id": organization_data["id"], + "frequency": "daily", + "url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus_single_record.json", + "schema_type": "type1", + "source_type": "dcatus", } @pytest.fixture -def job_orm_dcatus(job_data_dcatus: dict) -> HarvestJob: - return HarvestJob(**job_data_dcatus) - - -@pytest.fixture -def job_data_waf(source_data_waf: dict) -> dict: +def source_data_dcatus_bad_url(organization_data: dict) -> dict: return { - "id": "963cdc51-94d5-425d-a688-e0a57e0c5dd2", - "status": "new", - "harvest_source_id": source_data_waf["id"], + "id": "b059e587-a4a1-422e-825a-830b4913dbfb", + "name": "Bad URL Source", + "notification_emails": "bad@example.com", + "organization_id": organization_data["id"], + "frequency": "daily", + "url": f"{HARVEST_SOURCE_URL}/dcatus/bad_url.json", + "schema_type": "type1", + "source_type": "dcatus", } @pytest.fixture -def job_error_data(job_data_dcatus) -> dict: +def source_data_dcatus_invalid_records(organization_data) -> dict: return { - "harvest_job_id": job_data_dcatus["id"], - "message": "error reading records from harvest database", - "type": "ExtractInternalException", + "id": "8e7f539b-0a83-43ad-950e-3976bb11a425", + "name": "Invalid Record Source", + "notification_emails": "invalid_record@example.com", + "organization_id": organization_data["id"], + "frequency": "daily", + "url": "http://localhost/dcatus/missing_title.json", + "schema_type": "type1", + "source_type": "dcatus", } +## HARVEST JOBS @pytest.fixture -def job_data_dcatus_invalid(source_data_dcatus_invalid: dict) -> dict: - return { - "id": "59df7ba5-102d-4ae3-abd6-01b7eb26a338", - "status": "new", - "harvest_source_id": source_data_dcatus_invalid["id"], - } +def job_data_dcatus(fixtures_json) -> dict: + return fixtures_json["job"][0] -## HARVEST RECORDS @pytest.fixture -def source_data_dcatus_single_record(organization_data: dict) -> dict: - return { - "id": "2f2652de-91df-4c63-8b53-bfced20b276b", - "name": "Single Record Test Source", - "notification_emails": "email@example.com", - "organization_id": organization_data["id"], - "frequency": "daily", - "url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus_single_record.json", - "schema_type": "type1", - "source_type": "dcatus", - } +def job_data_dcatus_orm(job_data_dcatus: dict) -> HarvestJob: + return HarvestJob(**job_data_dcatus) @pytest.fixture -def record_data_dcatus(job_data_dcatus: dict) -> dict: +def job_data_dcatus_2(source_data_dcatus: dict) -> dict: return { - "id": "0779c855-df20-49c8-9108-66359d82b77c", - "identifier": "test_identifier", - "harvest_job_id": job_data_dcatus["id"], - "harvest_source_id": job_data_dcatus["harvest_source_id"], - "action": "create", - "status": "success", - "source_raw": "example data", + "id": "392ac4b3-79a6-414b-a2b3-d6c607d3b8d4", + "status": "new", + "harvest_source_id": source_data_dcatus["id"], } @pytest.fixture -def record_error_data(record_data_dcatus) -> dict: +def job_data_waf(source_data_waf: dict) -> dict: return { - "harvest_record_id": record_data_dcatus["id"], - "message": "record is invalid", - "type": "ValidationException", + "id": "963cdc51-94d5-425d-a688-e0a57e0c5dd2", + "status": "new", + "harvest_source_id": source_data_waf["id"], } @pytest.fixture -def source_data_dcatus_bad_url(organization_data: dict) -> dict: +def job_data_dcatus_invalid(source_data_dcatus_invalid: dict) -> dict: return { - "id": "b059e587-a4a1-422e-825a-830b4913dbfb", - "name": "Bad URL Source", - "notification_emails": "bad@example.com", - "organization_id": organization_data["id"], - "frequency": "daily", - "url": f"{HARVEST_SOURCE_URL}/dcatus/bad_url.json", - "schema_type": "type1", - "source_type": "dcatus", + "id": "59df7ba5-102d-4ae3-abd6-01b7eb26a338", + "status": "new", + "harvest_source_id": source_data_dcatus_invalid["id"], } @@ -281,20 +255,6 @@ def job_data_dcatus_bad_url(source_data_dcatus_bad_url: dict) -> dict: } -@pytest.fixture -def source_data_dcatus_invalid_records(organization_data) -> dict: - return { - "id": "8e7f539b-0a83-43ad-950e-3976bb11a425", - "name": "Invalid Record Source", - "notification_emails": "invalid_record@example.com", - "organization_id": organization_data["id"], - "frequency": "daily", - "url": "http://localhost/dcatus/missing_title.json", - "schema_type": "type1", - "source_type": "dcatus", - } - - @pytest.fixture def source_data_dcatus_invalid_records_job( source_data_dcatus_invalid_records: dict, @@ -306,6 +266,24 @@ def source_data_dcatus_invalid_records_job( } +## HARVEST JOB ERRORS +@pytest.fixture +def job_error_data(fixtures_json) -> dict: + return fixtures_json["job_error"][0] + + +## HARVEST RECORDS +@pytest.fixture +def record_data_dcatus(fixtures_json) -> List[dict]: + return fixtures_json["record"] + + +## HARVEST RECORD ERRORS +@pytest.fixture +def record_error_data(fixtures_json) -> List[dict]: + return fixtures_json["record_error"] + + @pytest.fixture def interface_no_jobs(interface, organization_data, source_data_dcatus): interface.add_organization(organization_data) @@ -314,6 +292,25 @@ def interface_no_jobs(interface, organization_data, source_data_dcatus): return interface +@pytest.fixture +def interface_with_fixture_json( + interface_no_jobs, + job_data_dcatus, + job_error_data, + record_data_dcatus, + record_error_data, +): + interface_no_jobs.add_harvest_job(job_data_dcatus) + interface_no_jobs.add_harvest_job_error(job_error_data) + for record in record_data_dcatus: + interface_no_jobs.add_harvest_record(record) + for error in record_error_data: + interface_no_jobs.add_harvest_record_error(error) + + return interface_no_jobs + + +## MISC @pytest.fixture def interface_with_multiple_jobs(interface_no_jobs, source_data_dcatus): statuses = ["new", "in_progress", "complete", "error"] diff --git a/tests/fixtures.json b/tests/fixtures.json new file mode 100644 index 0000000..2cd74fb --- /dev/null +++ b/tests/fixtures.json @@ -0,0 +1,179 @@ +{ + "organization": [ + { + "name": "Test Org", + "logo": "https://example.com/logo.png", + "id": "d925f84d-955b-4cb7-812f-dcfd6681a18f" + } + ], + "source": [ + { + "id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "name": "Test Source", + "notification_emails": "email@example.com", + "organization_id": "d925f84d-955b-4cb7-812f-dcfd6681a18f", + "frequency": "daily", + "url": "http://localhost:80/dcatus/dcatus.json", + "schema_type": "type1", + "source_type": "dcatus" + } + ], + "job": [ + { + "id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "status": "new", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b" + } + ], + "job_error": [ + { + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "message": "error reading records from harvest database", + "type": "ExtractInternalException" + } + ], + "record": [ + { + "id": "0779c855-df20-49c8-9108-66359d82b77c", + "identifier": "test_identifier-1", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "c218c965-3670-45c8-bfcd-f852d71ed917", + "identifier": "test_identifier-2", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "e1f603cc-8b6b-483f-beb4-86bda5462b79", + "identifier": "test_identifier-3", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "1c004473-0802-4f22-a16d-7a2d7559719e", + "identifier": "test_identifier-4", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "deb12fa0-d812-4d6e-98f4-d4f7d776c6b3", + "identifier": "test_identifier-5", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "27b5d5d6-808b-4a8c-ae4a-99f118e282dd", + "identifier": "test_identifier-6", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "c232a2ca-6344-4692-adc2-29f618a2eff3", + "identifier": "test_identifier-7", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "95021355-bad0-442b-98e9-475ecd849033", + "identifier": "test_identifier-8", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "09f073b3-00e3-4147-ba69-a5d0fd7ce027", + "identifier": "test_identifier-9", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + }, + { + "id": "97492788-5d62-4feb-8641-6f6692aec026", + "identifier": "test_identifier-10", + "harvest_job_id": "6bce761c-7a39-41c1-ac73-94234c139c76", + "harvest_source_id": "2f2652de-91df-4c63-8b53-bfced20b276b", + "action": "create", + "status": "error", + "source_raw": "example data" + } + ], + "record_error": [ + { + "harvest_record_id": "0779c855-df20-49c8-9108-66359d82b77c", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "c218c965-3670-45c8-bfcd-f852d71ed917", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "e1f603cc-8b6b-483f-beb4-86bda5462b79", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "1c004473-0802-4f22-a16d-7a2d7559719e", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "deb12fa0-d812-4d6e-98f4-d4f7d776c6b3", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "27b5d5d6-808b-4a8c-ae4a-99f118e282dd", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "c232a2ca-6344-4692-adc2-29f618a2eff3", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "95021355-bad0-442b-98e9-475ecd849033", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "09f073b3-00e3-4147-ba69-a5d0fd7ce027", + "message": "record is invalid", + "type": "ValidationException" + }, + { + "harvest_record_id": "97492788-5d62-4feb-8641-6f6692aec026", + "message": "record is invalid", + "type": "ValidationException" + } + ] +} diff --git a/tests/integration/app/test_load_manager.py b/tests/integration/app/test_load_manager.py index 8252c31..91a2550 100644 --- a/tests/integration/app/test_load_manager.py +++ b/tests/integration/app/test_load_manager.py @@ -27,13 +27,18 @@ class TestLoadManager: @patch("harvester.lib.cf_handler.CloudFoundryClient") @patch("harvester.lib.cf_handler.TaskManager") def test_load_manager_invokes_tasks( - self, TMMock, CFCMock, interface_no_jobs, source_orm_dcatus, mock_good_cf_index + self, + TMMock, + CFCMock, + interface_no_jobs, + source_data_dcatus_orm, + mock_good_cf_index, ): intervals = [-1, -2] jobs = [ { "status": "new", - "harvest_source_id": source_orm_dcatus.id, + "harvest_source_id": source_data_dcatus_orm.id, "date_created": datetime.now() + timedelta(days=interval), } for interval in intervals diff --git a/tests/integration/database/test_db.py b/tests/integration/database/test_db.py index b93f01f..543ae39 100644 --- a/tests/integration/database/test_db.py +++ b/tests/integration/database/test_db.py @@ -1,6 +1,8 @@ import json from datetime import datetime, timezone +from sqlalchemy import text + from database.models import HarvestJobError, HarvestRecordError from harvester.harvest import HarvestSource from harvester.utils.general_utils import dataset_to_hash, sort_dataset @@ -141,6 +143,39 @@ def test_add_harvest_job_error( assert isinstance(harvest_job_error, HarvestJobError) assert harvest_job_error.message == job_error_data["message"] + db_harvest_job_error = interface.pget_harvest_job_errors( + filter=text(f"harvest_job_id = '{job_data_dcatus['id']}'") + ) + assert db_harvest_job_error[0].type == job_error_data["type"] + assert db_harvest_job_error[0].id == harvest_job_error.id + + def test_add_harvest_record_error( + self, + interface, + organization_data, + source_data_dcatus, + job_data_dcatus, + record_data_dcatus, + record_error_data, + ): + interface.add_organization(organization_data) + interface.add_harvest_source(source_data_dcatus) + interface.add_harvest_job(job_data_dcatus) + interface.add_harvest_record(record_data_dcatus[0]) + + harvest_record_error = interface.add_harvest_record_error(record_error_data[0]) + assert isinstance(harvest_record_error, HarvestRecordError) + assert harvest_record_error.message == record_error_data[0]["message"] + + harvest_record_error_from_db = interface.get_harvest_error( + harvest_record_error.id + ) + assert harvest_record_error.id == harvest_record_error_from_db.id + assert ( + harvest_record_error.harvest_record_id + == harvest_record_error_from_db.harvest_record_id + ) + def test_add_harvest_record( self, interface, @@ -153,39 +188,33 @@ def test_add_harvest_record( source = interface.add_harvest_source(source_data_dcatus) harvest_job = interface.add_harvest_job(job_data_dcatus) - record = interface.add_harvest_record(record_data_dcatus) + record = interface.add_harvest_record(record_data_dcatus[0]) assert record.harvest_source_id == source.id assert record.harvest_job_id == harvest_job.id - def test_add_harvest_record_error( + def test_add_harvest_records( self, interface, organization_data, source_data_dcatus, job_data_dcatus, record_data_dcatus, - record_error_data, ): interface.add_organization(organization_data) interface.add_harvest_source(source_data_dcatus) interface.add_harvest_job(job_data_dcatus) - interface.add_harvest_record(record_data_dcatus) - harvest_record_error = interface.add_harvest_record_error(record_error_data) - assert isinstance(harvest_record_error, HarvestRecordError) - assert harvest_record_error.message == record_error_data["message"] + for record in record_data_dcatus: + del record["id"] - harvest_record_error_from_db = interface.get_harvest_error( - harvest_record_error.id - ) - assert harvest_record_error.id == harvest_record_error_from_db.id - assert ( - harvest_record_error.harvest_record_id - == harvest_record_error_from_db.harvest_record_id - ) + id_lookup_table = interface.add_harvest_records(record_data_dcatus) + db_records = interface.pget_harvest_records() + assert len(id_lookup_table) == 10 + assert len(db_records) == 10 + assert id_lookup_table[db_records[0].identifier] == db_records[0].id - def test_add_harvest_records( + def test_endpoint_pagnation( self, interface, organization_data, @@ -198,18 +227,57 @@ def test_add_harvest_records( interface.add_harvest_job(job_data_dcatus) records = [] - for i in range(10): - new_record = record_data_dcatus.copy() + for i in range(100): + new_record = record_data_dcatus[0].copy() del new_record["id"] new_record["identifier"] = f"test-identifier-{i}" records.append(new_record) id_lookup_table = interface.add_harvest_records(records) - db_records = interface.get_all_harvest_records() - assert len(id_lookup_table) == 10 - assert len(db_records) == 10 + + # get first page + db_records = interface.pget_harvest_records(page=0) + assert len(db_records) == 20 + assert db_records[0].identifier == "test-identifier-0" + assert id_lookup_table[db_records[0].identifier] == db_records[0].id + + # get second page + db_records = interface.pget_harvest_records(page=1) + assert len(db_records) == 20 + assert db_records[0].identifier == "test-identifier-20" + assert id_lookup_table[db_records[0].identifier] == db_records[0].id + + # get first page again + db_records = interface.pget_harvest_records(page=0) + assert len(db_records) == 20 + assert db_records[0].identifier == "test-identifier-0" assert id_lookup_table[db_records[0].identifier] == db_records[0].id + # don't paginate via feature flag + db_records = interface.pget_harvest_records(paginate=False) + assert len(db_records) == 100 + assert id_lookup_table[db_records[50].identifier] == db_records[50].id + + # get page 6 (r. 100 - 119), which is out of bounds / empty + db_records = interface.pget_harvest_records(page=6) + assert len(db_records) == 0 + + db_records = interface.pget_harvest_records( + filter=text(f"id = '{id_lookup_table['test-identifier-0']}'") + ) + assert len(db_records) == 1 + assert db_records[0].harvest_job_id == job_data_dcatus["id"] + + def test_endpoint_count( + self, interface_with_fixture_json, job_data_dcatus, record_data_dcatus + ): + interface = interface_with_fixture_json + job_id = job_data_dcatus + count = interface.get_harvest_record_errors_by_job( + job_id, count=True, skip_pagination=True + ) + assert count == len(record_data_dcatus) + def test_add_harvest_job_with_id( self, interface, organization_data, source_data_dcatus, job_data_dcatus ): diff --git a/tests/unit/test_pagination.py b/tests/unit/test_pagination.py new file mode 100644 index 0000000..ca00ff7 --- /dev/null +++ b/tests/unit/test_pagination.py @@ -0,0 +1,40 @@ +from unittest.mock import patch + +from app.paginate import Pagination +from database.interface import PAGINATE_ENTRIES_PER_PAGE + + +class TestPagination: + def test_return_defaults(self): + pagination = Pagination() + assert pagination.current == 1 + assert pagination.count == 1 + assert pagination.page_count == 1 + assert pagination.per_page == PAGINATE_ENTRIES_PER_PAGE + + def test_return_default_dict(self): + pagination = Pagination() + + expected = { + "current": 1, + "count": 1, + "page_count": 1, + "page_label": "Page", + "per_page": PAGINATE_ENTRIES_PER_PAGE, + "next": {"label": "Next"}, + "previous": {"label": "Previous"}, + "last_item": {"label": "Last page"}, + } + assert expected == pagination.to_dict() + + def test_update_current(self): + pagination = Pagination() + assert pagination.current == 1 + pagination.update_current(12) + assert pagination.current == 12 + + @patch("app.paginate.PAGINATE_ENTRIES_PER_PAGE", 7) + def test_change_pagination_val(self): + pagination = Pagination(count=40) + assert pagination.count == 40 + assert pagination.page_count == 6 From ad018f1c74f6ff47c5efdbbb974b62575e24e7be Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 12 Sep 2024 12:10:43 -0500 Subject: [PATCH 2/5] fix test; plus lint --- app/routes.py | 3 ++- database/interface.py | 8 +++---- tests/integration/app/test_login_required.py | 24 +++++--------------- tests/integration/database/test_db.py | 10 +++++--- 4 files changed, 19 insertions(+), 26 deletions(-) diff --git a/app/routes.py b/app/routes.py index 891dbc9..38ff3b5 100644 --- a/app/routes.py +++ b/app/routes.py @@ -463,7 +463,7 @@ def view_harvest_source_data(source_id: str): jobs = db.get_all_harvest_jobs_by_filter({"harvest_source_id": source.id}) records = db.get_harvest_record_by_source(source.id) ckan_records = [record for record in records if record.ckan_id is not None] - error_records = [record for record in records if record.status == 'error'] + error_records = [record for record in records if record.status == "error"] jobs = db.get_all_harvest_jobs_by_filter({"harvest_source_id": source.id}) next_job = "N/A" future_jobs = db.get_new_harvest_jobs_by_source_in_future(source.id) @@ -585,6 +585,7 @@ def clear_harvest_source(source_id): flash("Failed to clear harvest source") return {"message": "failed"} + # Delete Source @mod.route("/harvest_source/config/delete/", methods=["POST"]) @login_required diff --git a/database/interface.py b/database/interface.py index 47bc363..ffc6b84 100644 --- a/database/interface.py +++ b/database/interface.py @@ -1,15 +1,15 @@ +import logging import os +import time import uuid from datetime import datetime, timezone from functools import wraps +import ckanapi +from ckanapi import RemoteCKAN from sqlalchemy import create_engine, func, inspect, or_, select, text from sqlalchemy.exc import NoResultFound from sqlalchemy.orm import scoped_session, sessionmaker -from ckanapi import RemoteCKAN -import ckanapi -import time -import logging from .models import ( HarvestJob, diff --git a/tests/integration/app/test_login_required.py b/tests/integration/app/test_login_required.py index 60fe4e5..09af18e 100644 --- a/tests/integration/app/test_login_required.py +++ b/tests/integration/app/test_login_required.py @@ -56,9 +56,7 @@ def test_org_edit_buttons__logged_in( res = client.get(f"/organization/{organization_data['id']}") button_string_text = '
    ' org_edit_text = f' Date: Thu, 19 Sep 2024 13:01:21 -0500 Subject: [PATCH 3/5] fixes count decorator to work on queries where from is not supplied by ORM; fixes job errors subquery to filter by job --- database/interface.py | 14 ++++++- tests/conftest.py | 53 ++++++++++++++++++++++++++- tests/integration/database/test_db.py | 22 ++++++++++- 3 files changed, 84 insertions(+), 5 deletions(-) diff --git a/database/interface.py b/database/interface.py index ffc6b84..7bf2213 100644 --- a/database/interface.py +++ b/database/interface.py @@ -44,12 +44,18 @@ def _impl(self, *args, **kwargs): return _impl +# notes on the flag `maintain_column_froms`: +# https://github.com/sqlalchemy/sqlalchemy/discussions/6807#discussioncomment-1043732 +# docs: https://docs.sqlalchemy.org/en/14/core/selectable.html#sqlalchemy.sql.expression.Select.with_only_columns.params.maintain_column_froms +# def count(fn): @wraps(fn) def _impl(self, *args, **kwargs): query = fn(self, *args, **kwargs) if kwargs.get("count") is True: - count_q = query.statement.with_only_columns(*[func.count()]).order_by(None) + count_q = query.statement.with_only_columns( + func.count(), maintain_column_froms=True + ).order_by(None) count = query.session.execute(count_q).scalar() return count else: @@ -421,6 +427,7 @@ def get_harvest_record_errors_by_job(self, job_id: str, **kwargs): subquery = ( self.db.query(HarvestRecord.id) .filter(HarvestRecord.status == "error") + .filter(HarvestRecord.harvest_job_id == job_id) .subquery() ) query = self.db.query(HarvestRecordError).filter( @@ -604,19 +611,22 @@ def verify_user(self, usr_data): return False #### PAGINATED QUERIES - + @count @paginate def pget_harvest_jobs(self, filter=text(""), **kwargs): return self.db.query(HarvestJob).filter(filter) + @count @paginate def pget_harvest_records(self, filter=text(""), **kwargs): return self.db.query(HarvestRecord).filter(filter) + @count @paginate def pget_harvest_job_errors(self, filter=text(""), **kwargs): return self.db.query(HarvestJobError).filter(filter) + @count @paginate def pget_harvest_record_errors(self, filter=text(""), **kwargs): return self.db.query(HarvestRecordError).filter(filter) diff --git a/tests/conftest.py b/tests/conftest.py index 12abf1f..ccd6eb3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -220,11 +220,11 @@ def job_data_dcatus_orm(job_data_dcatus: dict) -> HarvestJob: @pytest.fixture -def job_data_dcatus_2(source_data_dcatus: dict) -> dict: +def job_data_dcatus_2(source_data_dcatus_2: dict) -> dict: return { "id": "392ac4b3-79a6-414b-a2b3-d6c607d3b8d4", "status": "new", - "harvest_source_id": source_data_dcatus["id"], + "harvest_source_id": source_data_dcatus_2["id"], } @@ -278,12 +278,43 @@ def record_data_dcatus(fixtures_json) -> List[dict]: return fixtures_json["record"] +@pytest.fixture +def record_data_dcatus_2(job_data_dcatus_2): + return [ + { + "id": "72bae4b2-336e-49df-bc4c-410dc73dc316", + "identifier": "test_identifier-2", + "harvest_job_id": job_data_dcatus_2["id"], + "harvest_source_id": job_data_dcatus_2["harvest_source_id"], + "action": "create", + "status": "error", + "source_raw": "example data 2", + } + ] + + ## HARVEST RECORD ERRORS @pytest.fixture def record_error_data(fixtures_json) -> List[dict]: return fixtures_json["record_error"] +@pytest.fixture +def record_error_data_2(record_data_dcatus_2) -> dict: + return [ + { + "harvest_record_id": record_data_dcatus_2[0]["id"], + "message": "record is invalid", + "type": "ValidationException", + }, + { + "harvest_record_id": record_data_dcatus_2[0]["id"], + "message": "record is invalid_2", + "type": "ValidationException", + }, + ] + + @pytest.fixture def interface_no_jobs(interface, organization_data, source_data_dcatus): interface.add_organization(organization_data) @@ -310,6 +341,24 @@ def interface_with_fixture_json( return interface_no_jobs +@pytest.fixture +def interface_with_multiple_sources( + interface_with_fixture_json, + source_data_dcatus_2, + job_data_dcatus_2, + record_data_dcatus_2, + record_error_data_2, +): + interface_with_fixture_json.add_harvest_source(source_data_dcatus_2) + interface_with_fixture_json.add_harvest_job(job_data_dcatus_2) + for record in record_data_dcatus_2: + interface_with_fixture_json.add_harvest_record(record) + for error in record_error_data_2: + interface_with_fixture_json.add_harvest_record_error(error) + + return interface_with_fixture_json + + ## MISC @pytest.fixture def interface_with_multiple_jobs(interface_no_jobs, source_data_dcatus): diff --git a/tests/integration/database/test_db.py b/tests/integration/database/test_db.py index 255f23f..d38e1f0 100644 --- a/tests/integration/database/test_db.py +++ b/tests/integration/database/test_db.py @@ -276,12 +276,32 @@ def test_endpoint_count( self, interface_with_fixture_json, job_data_dcatus, record_data_dcatus ): interface = interface_with_fixture_json - job_id = job_data_dcatus + job_id = job_data_dcatus["id"] count = interface.get_harvest_record_errors_by_job( job_id, count=True, skip_pagination=True ) assert count == len(record_data_dcatus) + def test_errors_by_job( + self, + interface_with_multiple_sources, + job_data_dcatus, + job_data_dcatus_2, + record_error_data, + record_error_data_2, + ): + interface = interface_with_multiple_sources + job_id = job_data_dcatus["id"] + count = interface.get_harvest_record_errors_by_job( + job_id, count=True, skip_pagination=True + ) + all_errors_count = interface.pget_harvest_record_errors( + count=True, + skip_pagination=True, + ) + assert count == len(record_error_data) + assert all_errors_count == len(record_error_data) + len(record_error_data_2) + def test_add_harvest_job_with_id( self, interface, organization_data, source_data_dcatus, job_data_dcatus ): From ae00cdfb8b5472c6cb051eba57e4d8b2f564c172 Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Thu, 19 Sep 2024 13:04:11 -0500 Subject: [PATCH 4/5] deletes redundant record from fixtures --- tests/conftest.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index ccd6eb3..e95550a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -306,12 +306,7 @@ def record_error_data_2(record_data_dcatus_2) -> dict: "harvest_record_id": record_data_dcatus_2[0]["id"], "message": "record is invalid", "type": "ValidationException", - }, - { - "harvest_record_id": record_data_dcatus_2[0]["id"], - "message": "record is invalid_2", - "type": "ValidationException", - }, + } ] From a96ac3fa63ccc1247fdc6e3bf104a48c8d5c8e9b Mon Sep 17 00:00:00 2001 From: Tyler Burton Date: Mon, 23 Sep 2024 10:43:28 -0500 Subject: [PATCH 5/5] makes test data route more explicit; adds memory and instance vars to manifest --- app/routes.py | 2 +- manifest.yml | 3 ++- vars.development.yml | 2 ++ vars.prod.yml | 2 ++ vars.staging.yml | 2 ++ 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/app/routes.py b/app/routes.py index 38ff3b5..3a24133 100644 --- a/app/routes.py +++ b/app/routes.py @@ -251,7 +251,7 @@ def cli_remove_harvest_source(id): ## Load Test Data # TODO move this into its own file when you break up routes -@testdata.cli.command("load") +@testdata.cli.command("load_test_data") def fixtures(): """Load database fixtures from JSON.""" import json diff --git a/manifest.yml b/manifest.yml index 65114cf..e5c1587 100644 --- a/manifest.yml +++ b/manifest.yml @@ -8,7 +8,8 @@ applications: services: - ((app_name))-db - ((app_name))-secrets - instances: 1 + instances: ((admin_instances)) + memory: ((admin_memory_quota)) env: FLASK_APP: run.py CF_API_URL: ((CF_API_URL)) diff --git a/vars.development.yml b/vars.development.yml index 77c95c6..7cc130c 100644 --- a/vars.development.yml +++ b/vars.development.yml @@ -1,5 +1,7 @@ app_name: datagov-harvest route_external: datagov-harvest-admin-dev.app.cloud.gov +admin_instances: 2 +admin_memory_quota: 3G CF_API_URL: https://api.fr.cloud.gov CKAN_API_URL: https://catalog-next-dev-admin-datagov.app.cloud.gov HARVEST_RUNNER_APP_GUID: e6a8bba8-ed6d-4200-8280-67b46cebdc63 diff --git a/vars.prod.yml b/vars.prod.yml index fd800a5..17297e7 100644 --- a/vars.prod.yml +++ b/vars.prod.yml @@ -1,5 +1,7 @@ app_name: datagov-harvest route_external: datagov-harvest-admin-prod.app.cloud.gov +admin_instances: 2 +admin_memory_quota: 3G CF_API_URL: https://api.fr.cloud.gov CKAN_API_URL: https://catalog.data.gov HARVEST_RUNNER_APP_GUID: null diff --git a/vars.staging.yml b/vars.staging.yml index 3960ecd..f504eb3 100644 --- a/vars.staging.yml +++ b/vars.staging.yml @@ -1,5 +1,7 @@ app_name: datagov-harvest route_external: datagov-harvest-admin-stage.app.cloud.gov +admin_instances: 2 +admin_memory_quota: 3G CF_API_URL: https://api.fr.cloud.gov CKAN_API_URL: https://catalog-stage.data.gov HARVEST_RUNNER_APP_GUID: null