From 8ec2043433903d116c198e11d14cf931289f7782 Mon Sep 17 00:00:00 2001 From: William Dutton Date: Wed, 11 Dec 2024 07:29:38 +1000 Subject: [PATCH 1/8] chore: alignment, move logic into logic folder and split auth and actions apart, move plugin from folder to root plugin.py --- .gitignore | 1 + ckanext/validation/logic/__init__.py | 0 .../validation/{logic.py => logic/action.py} | 42 ++++++------------- ckanext/validation/logic/auth.py | 39 +++++++++++++++++ .../{plugin/__init__.py => plugin.py} | 33 +++------------ ckanext/validation/tests/test_interfaces.py | 8 ++-- ckanext/validation/tests/test_logic.py | 12 +++--- ckanext/validation/tests/test_plugin.py | 42 +++++++++---------- 8 files changed, 90 insertions(+), 87 deletions(-) create mode 100644 ckanext/validation/logic/__init__.py rename ckanext/validation/{logic.py => logic/action.py} (96%) create mode 100644 ckanext/validation/logic/auth.py rename ckanext/validation/{plugin/__init__.py => plugin.py} (90%) diff --git a/.gitignore b/.gitignore index 7bbc71c0..265a4a55 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,4 @@ ENV/ # mypy .mypy_cache/ +.idea diff --git a/ckanext/validation/logic/__init__.py b/ckanext/validation/logic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ckanext/validation/logic.py b/ckanext/validation/logic/action.py similarity index 96% rename from ckanext/validation/logic.py rename to ckanext/validation/logic/action.py index 66ced821..0d4f8ec8 100644 --- a/ckanext/validation/logic.py +++ b/ckanext/validation/logic/action.py @@ -26,6 +26,19 @@ log = logging.getLogger(__name__) +def get_actions(): + validators = ( + resource_validation_run, + resource_validation_show, + resource_validation_delete, + resource_validation_run_batch, + resource_create, + resource_update, + ) + + return {"{}".format(func.__name__): func for func in validators} + + def enqueue_job(*args, **kwargs): try: return t.enqueue_job(*args, **kwargs) @@ -34,35 +47,6 @@ def enqueue_job(*args, **kwargs): return enqueue_job_legacy(*args, **kwargs) -# Auth - -def auth_resource_validation_run(context, data_dict): - if t.check_access( - u'resource_update', context, {u'id': data_dict[u'resource_id']}): - return {u'success': True} - return {u'success': False} - - -def auth_resource_validation_delete(context, data_dict): - if t.check_access( - u'resource_update', context, {u'id': data_dict[u'resource_id']}): - return {u'success': True} - return {u'success': False} - - -@t.auth_allow_anonymous_access -def auth_resource_validation_show(context, data_dict): - if t.check_access( - u'resource_show', context, {u'id': data_dict[u'resource_id']}): - return {u'success': True} - return {u'success': False} - - -def auth_resource_validation_run_batch(context, data_dict): - '''u Sysadmins only''' - return {u'success': False} - - # Actions diff --git a/ckanext/validation/logic/auth.py b/ckanext/validation/logic/auth.py new file mode 100644 index 00000000..2139ac99 --- /dev/null +++ b/ckanext/validation/logic/auth.py @@ -0,0 +1,39 @@ +import ckan.plugins.toolkit as tk + + +def get_auth_functions(): + validators = ( + resource_validation_run, + resource_validation_delete, + resource_validation_show, + resource_validation_run_batch, + ) + + return {"{}".format(func.__name__): func for func in validators} + + +def resource_validation_run(context, data_dict): + if tk.check_access(u'resource_update', context, + {u'id': data_dict[u'resource_id']}): + return {u'success': True} + return {u'success': False} + + +def resource_validation_delete(context, data_dict): + if tk.check_access(u'resource_update', context, + {u'id': data_dict[u'resource_id']}): + return {u'success': True} + return {u'success': False} + + +@tk.auth_allow_anonymous_access +def resource_validation_show(context, data_dict): + if tk.check_access(u'resource_show', context, + {u'id': data_dict[u'resource_id']}): + return {u'success': True} + return {u'success': False} + + +def resource_validation_run_batch(context, data_dict): + '''u Sysadmins only''' + return {u'success': False} diff --git a/ckanext/validation/plugin/__init__.py b/ckanext/validation/plugin.py similarity index 90% rename from ckanext/validation/plugin/__init__.py rename to ckanext/validation/plugin.py index c3e8b467..ddda1ab4 100644 --- a/ckanext/validation/plugin/__init__.py +++ b/ckanext/validation/plugin.py @@ -10,14 +10,7 @@ from ckanext.validation import settings from ckanext.validation.model import tables_exist -from ckanext.validation.logic import ( - resource_validation_run, resource_validation_show, - resource_validation_delete, resource_validation_run_batch, - auth_resource_validation_run, auth_resource_validation_show, - auth_resource_validation_delete, auth_resource_validation_run_batch, - resource_create as custom_resource_create, - resource_update as custom_resource_update, -) +from .logic import action, auth from ckanext.validation.helpers import ( get_validation_badge, validation_extract_report_from_errors, @@ -75,33 +68,19 @@ def update_config(self, config_): else: log.debug(u'Validation tables exist') - t.add_template_directory(config_, u'../templates') - t.add_public_directory(config_, u'../public') - t.add_resource(u'../webassets', 'ckanext-validation') + t.add_template_directory(config_, u'templates') + t.add_public_directory(config_, u'public') + t.add_resource(u'webassets', 'ckanext-validation') # IActions def get_actions(self): - new_actions = { - u'resource_validation_run': resource_validation_run, - u'resource_validation_show': resource_validation_show, - u'resource_validation_delete': resource_validation_delete, - u'resource_validation_run_batch': resource_validation_run_batch, - u'resource_create': custom_resource_create, - u'resource_update': custom_resource_update, - } - - return new_actions + return action.get_actions() # IAuthFunctions def get_auth_functions(self): - return { - u'resource_validation_run': auth_resource_validation_run, - u'resource_validation_show': auth_resource_validation_show, - u'resource_validation_delete': auth_resource_validation_delete, - u'resource_validation_run_batch': auth_resource_validation_run_batch, - } + return auth.get_auth_functions() # ITemplateHelpers diff --git a/ckanext/validation/tests/test_interfaces.py b/ckanext/validation/tests/test_interfaces.py index cadca410..4da4e491 100644 --- a/ckanext/validation/tests/test_interfaces.py +++ b/ckanext/validation/tests/test_interfaces.py @@ -137,7 +137,7 @@ def test_can_validate_called_on_update_sync_no_validation(self, mock_validation) class TestInterfaceAsync(): @pytest.mark.ckan_config('ckanext.validation.run_on_create_async', True) - @mock.patch('ckanext.validation.logic.enqueue_job') + @mock.patch('ckanext.validation.logic.action.enqueue_job') def test_can_validate_called_on_create_async(self, mock_validation): dataset = factories.Dataset() @@ -152,7 +152,7 @@ def test_can_validate_called_on_create_async(self, mock_validation): assert mock_validation.called @pytest.mark.ckan_config('ckanext.validation.run_on_create_async', True) - @mock.patch('ckanext.validation.logic.enqueue_job') + @mock.patch('ckanext.validation.logic.action.enqueue_job') def test_can_validate_called_on_create_async_no_validation(self, mock_validation): dataset = factories.Dataset() @@ -169,7 +169,7 @@ def test_can_validate_called_on_create_async_no_validation(self, mock_validation @pytest.mark.ckan_config('ckanext.validation.run_on_create_async', False) @pytest.mark.ckan_config('ckanext.validation.run_on_update_async', True) - @mock.patch('ckanext.validation.logic.enqueue_job') + @mock.patch('ckanext.validation.logic.action.enqueue_job') def test_can_validate_called_on_update_async(self, mock_validation): dataset = factories.Dataset() @@ -187,7 +187,7 @@ def test_can_validate_called_on_update_async(self, mock_validation): @pytest.mark.ckan_config('ckanext.validation.run_on_create_async', False) @pytest.mark.ckan_config('ckanext.validation.run_on_update_async', True) - @mock.patch('ckanext.validation.logic.enqueue_job') + @mock.patch('ckanext.validation.logic.action.enqueue_job') def test_can_validate_called_on_update_async_no_validation(self, mock_validation): dataset = factories.Dataset() diff --git a/ckanext/validation/tests/test_logic.py b/ckanext/validation/tests/test_logic.py index 1271a152..bbdfbb86 100644 --- a/ckanext/validation/tests/test_logic.py +++ b/ckanext/validation/tests/test_logic.py @@ -59,14 +59,14 @@ def test_resource_validation_no_url_or_upload(self): assert "Resource must have a valid URL" in str(e) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_resource_validation_with_url(self, mock_enqueue_job): resource = factories.Resource(url="http://example.com", format="csv") call_action("resource_validation_run", resource_id=resource["id"]) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_resource_validation_with_upload(self, mock_enqueue_job): resource = factories.Resource(url="", url_type="upload", format="csv") @@ -85,7 +85,7 @@ def test_resource_validation_run_starts_job(self): assert len(jobs_after) == len(jobs) + 1 - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_resource_validation_creates_validation_object(self, mock_enqueue_job): resource = factories.Resource(format="csv") @@ -106,7 +106,7 @@ def test_resource_validation_creates_validation_object(self, mock_enqueue_job): assert validation.error is None @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_resource_validation_resets_existing_validation_object( self, mock_enqueue_job ): @@ -145,7 +145,7 @@ def test_resource_validation_resets_existing_validation_object( assert validation.report is None assert validation.error is None - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_resource_validation_only_called_on_resource_created( self, mock_enqueue_job ): @@ -170,7 +170,7 @@ def test_resource_validation_only_called_on_resource_created( assert mock_enqueue_job.call_count == 1 assert mock_enqueue_job.call_args[0][1][0]["id"] == resource2["id"] - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_resource_validation_only_called_on_resource_updated( self, mock_enqueue_job ): diff --git a/ckanext/validation/tests/test_plugin.py b/ckanext/validation/tests/test_plugin.py index 866db6c0..ac6f368e 100644 --- a/ckanext/validation/tests/test_plugin.py +++ b/ckanext/validation/tests/test_plugin.py @@ -12,7 +12,7 @@ class TestResourceControllerHooksUpdate(object): @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_on_other_fields(self, mock_enqueue): resource = {"format": "CSV"} @@ -26,7 +26,7 @@ def test_validation_does_not_run_on_other_fields(self, mock_enqueue): mock_enqueue.assert_not_called() @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_on_other_formats(self, mock_enqueue): resource = {"format": "PDF"} @@ -38,7 +38,7 @@ def test_validation_does_not_run_on_other_formats(self, mock_enqueue): mock_enqueue.assert_not_called() @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_run_on_upload(self, mock_enqueue): resource = {"format": "CSV", "upload": "mock_upload", "url_type": "upload"} @@ -53,7 +53,7 @@ def test_validation_run_on_upload(self, mock_enqueue): assert mock_enqueue.call_args[0][1][0]["id"] == dataset["resources"][0]["id"] @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_run_on_url_change(self, mock_enqueue): resource = {"format": "CSV", "url": "https://some.url"} @@ -70,7 +70,7 @@ def test_validation_run_on_url_change(self, mock_enqueue): assert mock_enqueue.call_args[0][1][0]["id"] == dataset["resources"][0]["id"] @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_run_on_schema_change(self, mock_enqueue): resource = { @@ -93,7 +93,7 @@ def test_validation_run_on_schema_change(self, mock_enqueue): assert mock_enqueue.call_args[0][1][0]["id"] == dataset["resources"][0]["id"] @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_run_on_format_change(self, mock_enqueue): resource = factories.Resource() @@ -109,7 +109,7 @@ def test_validation_run_on_format_change(self, mock_enqueue): @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_when_config_false(self, mock_enqueue): resource = factories.Resource(format="CSV") @@ -124,14 +124,14 @@ def test_validation_does_not_run_when_config_false(self, mock_enqueue): @pytest.mark.usefixtures("clean_db", "validation_setup", "with_plugins") class TestResourceControllerHooksCreate(object): - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_on_other_formats(self, mock_enqueue): factories.Resource(format="PDF") mock_enqueue.assert_not_called() - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False) def test_validation_run_with_upload(self, mock_enqueue): @@ -142,7 +142,7 @@ def test_validation_run_with_upload(self, mock_enqueue): assert mock_enqueue.call_args[0][0] == run_validation_job assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"] - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False) def test_validation_run_with_url(self, mock_enqueue): @@ -155,7 +155,7 @@ def test_validation_run_with_url(self, mock_enqueue): @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_when_config_false(self, mock_enqueue): dataset = factories.Dataset() @@ -174,7 +174,7 @@ def test_validation_does_not_run_when_config_false(self, mock_enqueue): @pytest.mark.usefixtures("clean_db", "validation_setup", "with_plugins") class TestPackageControllerHooksCreate(object): - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_on_other_formats(self, mock_enqueue): factories.Dataset(resources=[{"format": "PDF"}]) @@ -182,14 +182,14 @@ def test_validation_does_not_run_on_other_formats(self, mock_enqueue): mock_enqueue.assert_not_called() @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_when_config_false(self, mock_enqueue): factories.Dataset(resources=[{"format": "CSV", "url": "http://some.data"}]) mock_enqueue.assert_not_called() - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_run_with_upload(self, mock_enqueue): resource = {"id": "test-resource-id", "format": "CSV", "url_type": "upload"} @@ -200,7 +200,7 @@ def test_validation_run_with_upload(self, mock_enqueue): assert mock_enqueue.call_args[0][0] == run_validation_job assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"] - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_run_with_url(self, mock_enqueue): resource = { @@ -215,7 +215,7 @@ def test_validation_run_with_url(self, mock_enqueue): assert mock_enqueue.call_args[0][0] == run_validation_job assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"] - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_run_only_supported_formats(self, mock_enqueue): resource1 = { @@ -241,7 +241,7 @@ def test_validation_run_only_supported_formats(self, mock_enqueue): class TestPackageControllerHooksUpdate(object): @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_runs_with_url(self, mock_enqueue): resource = { @@ -263,7 +263,7 @@ def test_validation_runs_with_url(self, mock_enqueue): assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"] @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_runs_with_upload(self, mock_enqueue): resource = {"id": "test-resource-id", "format": "CSV", "url_type": "upload"} @@ -281,7 +281,7 @@ def test_validation_runs_with_upload(self, mock_enqueue): assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"] @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_on_other_formats(self, mock_enqueue): resource = {"id": "test-resource-id", "format": "PDF", "url": "http://some.doc"} @@ -296,7 +296,7 @@ def test_validation_does_not_run_on_other_formats(self, mock_enqueue): mock_enqueue.assert_not_called() @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_run_only_supported_formats(self, mock_enqueue): resource1 = { @@ -325,7 +325,7 @@ def test_validation_run_only_supported_formats(self, mock_enqueue): @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False) @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False) - @mock.patch("ckanext.validation.logic.enqueue_job") + @mock.patch("ckanext.validation.logic.action.enqueue_job") def test_validation_does_not_run_when_config_false(self, mock_enqueue): resource = { From 85509428dda23c57f7684e9c01133e35251e285d Mon Sep 17 00:00:00 2001 From: William Dutton Date: Wed, 11 Dec 2024 08:38:21 +1000 Subject: [PATCH 2/8] chore: Correct flask blueprint and cleanup imports by using get_commands in sections classes --- MANIFEST.in | 4 +- ckanext/validation/blueprints.py | 46 ------------------- ckanext/validation/cli.py | 4 ++ ckanext/validation/common.py | 56 +++++++++++++++++++++++ ckanext/validation/helpers.py | 12 +++++ ckanext/validation/plugin.py | 76 +++++++++++++++++++++----------- ckanext/validation/validators.py | 9 ++++ ckanext/validation/views.py | 44 +++--------------- 8 files changed, 140 insertions(+), 111 deletions(-) delete mode 100644 ckanext/validation/blueprints.py create mode 100644 ckanext/validation/common.py diff --git a/MANIFEST.in b/MANIFEST.in index cbb4ca17..1e3a411a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -include README.rst +include README.md include LICENSE include requirements.txt -recursive-include ckanext/validation *.html *.json *.js *.less *.css *.mo \ No newline at end of file +recursive-include ckanext/validation *.html *.json *.js *.less *.css *.mo *.config *.yml \ No newline at end of file diff --git a/ckanext/validation/blueprints.py b/ckanext/validation/blueprints.py deleted file mode 100644 index 3ec0dc34..00000000 --- a/ckanext/validation/blueprints.py +++ /dev/null @@ -1,46 +0,0 @@ -# encoding: utf-8 - -from flask import Blueprint - -from ckantoolkit import c, NotAuthorized, ObjectNotFound, abort, _, render, get_action - -validation = Blueprint("validation", __name__) - - -def read(id, resource_id): - - try: - validation = get_action(u"resource_validation_show")( - {u"user": c.user}, {u"resource_id": resource_id} - ) - - resource = get_action(u"resource_show")({u"user": c.user}, {u"id": resource_id}) - - dataset = get_action(u"package_show")( - {u"user": c.user}, {u"id": resource[u"package_id"]} - ) - - # Needed for core resource templates - c.package = c.pkg_dict = dataset - c.resource = resource - - return render( - u"validation/validation_read.html", - extra_vars={ - u"validation": validation, - u"resource": resource, - u"dataset": dataset, - u"pkg_dict": dataset, - }, - ) - - except NotAuthorized: - abort(403, _(u"Unauthorized to read this validation report")) - except ObjectNotFound: - - abort(404, _(u"No validation report exists for this resource")) - - -validation.add_url_rule( - "/dataset//resource//validation", view_func=read -) diff --git a/ckanext/validation/cli.py b/ckanext/validation/cli.py index 3b8a0570..3159840a 100644 --- a/ckanext/validation/cli.py +++ b/ckanext/validation/cli.py @@ -5,6 +5,10 @@ from ckanext.validation.model import create_tables, tables_exist +def get_commands(): + return [validation] + + @click.group() def validation(): """Harvests remotely mastered metadata.""" diff --git a/ckanext/validation/common.py b/ckanext/validation/common.py new file mode 100644 index 00000000..cd4a284e --- /dev/null +++ b/ckanext/validation/common.py @@ -0,0 +1,56 @@ +# encoding: utf-8 + +import csv +import logging +import six +import sys + +from ckantoolkit import (c, NotAuthorized, + ObjectNotFound, abort, _, + render, get_action, config) + +from ckanext.validation import settings +from ckanext.validation.logic.action import _search_datasets +from ckanext.validation.model import create_tables, tables_exist + + +log = logging.getLogger(__name__) + +############################################################################### +# Controller # +############################################################################### + + +def validation(resource_id, id=None): + try: + validation = get_action(u'resource_validation_show')( + {u'user': c.user}, + {u'resource_id': resource_id}) + + resource = get_action(u'resource_show')( + {u'user': c.user}, + {u'id': resource_id}) + + package_id = resource[u'package_id'] + if id and id != package_id: + raise ObjectNotFound("Resource {} not found in package {}".format(resource_id, id)) + + dataset = get_action(u'package_show')( + {u'user': c.user}, + {u'id': id or resource[u'package_id']}) + + # Needed for core resource templates + c.package = c.pkg_dict = dataset + c.resource = resource + + return render(u'validation/validation_read.html', extra_vars={ + u'validation': validation, + u'resource': resource, + u'pkg_dict': dataset, + u'dataset': dataset, + }) + + except NotAuthorized: + return abort(403, _(u'Unauthorized to read this validation report')) + except ObjectNotFound: + return abort(404, _(u'No validation report exists for this resource')) \ No newline at end of file diff --git a/ckanext/validation/helpers.py b/ckanext/validation/helpers.py index b6c856df..aa36572b 100644 --- a/ckanext/validation/helpers.py +++ b/ckanext/validation/helpers.py @@ -4,6 +4,18 @@ from ckan.lib.helpers import url_for_static from ckantoolkit import url_for, _, config, asbool, literal, h +def _get_helpers(): + validators = ( + get_validation_badge, + validation_extract_report_from_errors, + dump_json_value, + bootstrap_version, + validation_dict, + use_webassets + ) + + return {"{}".format(func.__name__): func for func in validators} + def get_validation_badge(resource, in_listing=False): diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py index ddda1ab4..b9ae0f77 100644 --- a/ckanext/validation/plugin.py +++ b/ckanext/validation/plugin.py @@ -1,14 +1,16 @@ # encoding: utf-8 +import json import logging import cgi -import json + from werkzeug.datastructures import FileStorage as FlaskFileStorage import ckan.plugins as p import ckantoolkit as t -from ckanext.validation import settings +from . import settings, validators +from .helpers import _get_helpers from ckanext.validation.model import tables_exist from .logic import action, auth from ckanext.validation.helpers import ( @@ -28,8 +30,7 @@ get_update_mode_from_config, ) from ckanext.validation.interfaces import IDataValidation -from ckanext.validation import blueprints, cli - +from ckanext.validation import views, cli ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage) log = logging.getLogger(__name__) @@ -49,22 +50,23 @@ class ValidationPlugin(p.SingletonPlugin): # IBlueprint def get_blueprint(self): - return [blueprints.validation] + return views.get_blueprints() # IClick def get_commands(self): - return [cli.validation] + return cli.get_commands() # IConfigurer def update_config(self, config_): if not tables_exist(): + init_command = 'ckan validation init-db' log.critical(u''' -The validation extension requires a database setup. Please run the following -to create the database tables: - paster --plugin=ckanext-validation validation init-db -''') +The validation extension requires a database setup. +Validation pages will not be enabled. +Please run the following to create the database tables: + %s''', init_command) else: log.debug(u'Validation tables exist') @@ -85,14 +87,12 @@ def get_auth_functions(self): # ITemplateHelpers def get_helpers(self): - return { - u'get_validation_badge': get_validation_badge, - u'validation_extract_report_from_errors': validation_extract_report_from_errors, - u'dump_json_value': dump_json_value, - u'bootstrap_version': bootstrap_version, - u'validation_dict': validation_dict, - u'use_webassets': use_webassets, - } + return _get_helpers() + + # IValidators + + def get_validators(self): + return validators.get_validators() # IResourceController @@ -132,14 +132,27 @@ def _process_schema_fields(self, data_dict): return data_dict + # CKAN < 2.10 def before_create(self, context, data_dict): + return self.before_resource_create(context, data_dict) + + # CKAN >= 2.10 + def before_resource_create(self, context, data_dict): is_dataset = self._data_dict_is_dataset(data_dict) if not is_dataset: context["_resource_create_call"] = True return self._process_schema_fields(data_dict) + # CKAN < 2.10 def after_create(self, context, data_dict): + # if (self._data_dict_is_dataset(data_dict)): + # return self.after_dataset_create(context, data_dict) + # else: + return self.after_resource_create(context, data_dict) + + # CKAN >= 2.10 + def after_resource_create(self, context, data_dict): is_dataset = self._data_dict_is_dataset(data_dict) @@ -185,7 +198,12 @@ def _handle_validation_for_resource(self, context, resource): _run_async_validation(resource[u'id']) + # CKAN < 2.10 def before_update(self, context, current_resource, updated_resource): + return self.before_resource_update(context, current_resource, updated_resource) + + # CKAN >= 2.10 + def before_resource_update(self, context, current_resource, updated_resource): updated_resource = self._process_schema_fields(updated_resource) @@ -225,7 +243,15 @@ def before_update(self, context, current_resource, updated_resource): return updated_resource + # CKAN < 2.10 def after_update(self, context, data_dict): + # if (self._data_dict_is_dataset(data_dict)): + # return self.after_dataset_update(context, data_dict) + # else: + return self.after_resource_update(context, data_dict) + + # CKAN >= 2.10 + def after_resource_update(self, context, data_dict): is_dataset = self._data_dict_is_dataset(data_dict) @@ -287,7 +313,13 @@ def after_update(self, context, data_dict): # IPackageController + # CKAN < 2.10 def before_index(self, index_dict): + if (self._data_dict_is_dataset(index_dict)): + return self.before_dataset_index(index_dict) + + # CKAN >= 2.10 + def before_dataset_index(self, index_dict): res_status = [] dataset_dict = json.loads(index_dict['validated_data_dict']) @@ -300,14 +332,6 @@ def before_index(self, index_dict): return index_dict - # IValidators - - def get_validators(self): - return { - 'resource_schema_validator': resource_schema_validator, - 'validation_options_validator': validation_options_validator, - } - def _run_async_validation(resource_id): diff --git a/ckanext/validation/validators.py b/ckanext/validation/validators.py index 9e1962a8..da7c081a 100644 --- a/ckanext/validation/validators.py +++ b/ckanext/validation/validators.py @@ -6,6 +6,15 @@ from ckantoolkit import Invalid, config +def get_validators(): + validators = ( + resource_schema_validator, + validation_options_validator, + ) + + return {"{}".format(func.__name__): func for func in validators} + + # Input validators def resource_schema_validator(value, context): diff --git a/ckanext/validation/views.py b/ckanext/validation/views.py index c17f44ee..8e91962a 100644 --- a/ckanext/validation/views.py +++ b/ckanext/validation/views.py @@ -2,46 +2,16 @@ from flask import Blueprint -from ckantoolkit import ( - c, NotAuthorized, ObjectNotFound, - abort, _, render, get_action) +from ckanext.validation import common -validation = Blueprint("service_proxy", __name__) +validation = Blueprint(u'validation', __name__) -def validation_read(self, id, resource_id): - - try: - validation = get_action(u'resource_validation_show')( - {u'user': c.user}, - {u'resource_id': resource_id}) - - resource = get_action(u'resource_show')( - {u'user': c.user}, - {u'id': resource_id}) - - dataset = get_action(u'package_show')( - {u'user': c.user}, - {u'id': resource[u'package_id']}) - - # Needed for core resource templates - c.package = c.pkg_dict = dataset - c.resource = resource - - return render(u'validation/validation_read.html', extra_vars={ - u'validation': validation, - u'resource': resource, - u'dataset': dataset, - }) - - except NotAuthorized: - abort(403, _(u'Unauthorized to read this validation report')) - except ObjectNotFound: - - abort(404, _(u'No validation report exists for this resource')) - validation.add_url_rule( - '/dataset/{id}/resource/{resource_id}/validation', - view_func=validation_read + u'/dataset//resource//validation', 'read', methods=('GET',), view_func=common.validation ) + + +def get_blueprints(): + return [validation] From 8755a7801149eb62d280dfd42dcc1c5d55608762 Mon Sep 17 00:00:00 2001 From: William Dutton Date: Wed, 11 Dec 2024 13:24:59 +1000 Subject: [PATCH 3/8] chore: move commands to common and cross referfence in cli also --- ckanext/validation/cli.py | 81 +++++++++- ckanext/validation/commands.py | 267 ++------------------------------- ckanext/validation/common.py | 262 +++++++++++++++++++++++++++++++- 3 files changed, 344 insertions(+), 266 deletions(-) diff --git a/ckanext/validation/cli.py b/ckanext/validation/cli.py index 3159840a..5a68627d 100644 --- a/ckanext/validation/cli.py +++ b/ckanext/validation/cli.py @@ -2,7 +2,7 @@ import click -from ckanext.validation.model import create_tables, tables_exist +from ckanext.validation import common def get_commands(): @@ -17,10 +17,77 @@ def validation(): @validation.command() def init_db(): - """Creates the necessary tables in the database.""" - if tables_exist(): - print(u"Validation tables already exist") - sys.exit(0) + """ Initialize database tables. + """ + common.init_db() - create_tables() - print(u"Validation tables created") + +@validation.command(name='run') +@click.option(u'-y', u'--yes', + help=u'Automatic yes to prompts. Assume "yes" as answer ' + u'to all prompts and run non-interactively', + default=False) +@click.option('-r', '--resource', + multiple=True, + help=u'Run data validation on a particular resource (if the format is suitable).' + u'It can be defined multiple times. Not to be used with -d or -s') +@click.option('-d', '--dataset', + multiple=True, + help=u'Run data validation on all resources for a particular dataset (if the format is suitable).' + u' You can use the dataset id or name, and it can be defined multiple times. ' + u'Not to be used with -r or -s') +@click.option('-s', '--search', + default=False, + help=u'Extra search parameters that will be used for getting the datasets to run ' + u'validation on. It must be a JSON object like the one used by the `package_search` API call.' + u' Supported fields are `q`, `fq` and `fq_list`. Check the documentation for examples. ' + u'Note that when using this you will have to specify the resource formats to target yourself.' + u' Not to be used with -r or -d.') +def run_validation(yes, resource, dataset, search): + '''Start asynchronous data validation on the site resources. If no + options are provided it will run validation on all resources of + the supported formats (`ckanext.validation.formats`). You can + specify particular datasets to run the validation on their + resources. You can also pass arbitrary search parameters to filter + the selected datasets. + ''' + common.run_validation(yes, resource, dataset, search) + + +@validation.command() +@click.option(u'-o', u'--output', + help=u'Location of the CSV validation report file on the relevant commands.', + default=u'validation_errors_report.csv') +def report(output): + '''Generate a report with all current data validation reports. This + will print an overview of the total number of tabular resources + and a breakdown of how many have a validation status of success, + failure or error. Additionally it will create a CSV report with all + failing resources, including the following fields: + * Dataset name + * Resource id + * Resource URL + * Status + * Validation report URL + ''' + common.report(output) + + +@validation.command(name='report-full') +@click.option(u'-o', u'--output', + help=u'Location of the CSV validation report file on the relevant commands.', + default=u'validation_errors_report.csv') +def report_full(output): + '''Generate a detailed report. This is similar to 'report' + but on the CSV report it will add a row for each error found on the + validation report (limited to ten occurrences of the same error + type per file). So the fields in the generated CSV report will be: + + * Dataset name + * Resource id + * Resource URL + * Status + * Error code + * Error message + ''' + common.report(output, full=True) diff --git a/ckanext/validation/commands.py b/ckanext/validation/commands.py index 4cb1ba69..04505bb9 100644 --- a/ckanext/validation/commands.py +++ b/ckanext/validation/commands.py @@ -1,25 +1,10 @@ # encoding: utf-8 import sys -import logging -import csv -from ckan.lib.cli import query_yes_no -from ckantoolkit import CkanCommand, get_action, config +from ckantoolkit import CkanCommand -from ckanext.validation import settings -from ckanext.validation.model import create_tables, tables_exist -from ckanext.validation.logic import _search_datasets - - -def error(msg): - ''' - Print an error message to STDOUT and exit with return code 1. - ''' - sys.stderr.write(msg) - if not msg.endswith('\n'): - sys.stderr.write('\n') - sys.exit(1) +from ckanext.validation import common class Validation(CkanCommand): @@ -111,9 +96,6 @@ def __init__(self, name): help='''Location of the CSV validation report file on the relevant commands.''') - - _page_size = 100 - def command(self): self._load_config() @@ -137,249 +119,18 @@ def command(self): sys.exit(1) def init_db(self): - - if tables_exist(): - print(u'Validation tables already exist') - sys.exit(0) - - create_tables() - - print(u'Validation tables created') + common.init_db() def run_validation(self): - if self.options.resource_id: - for resource_id in self.options.resource_id: - resource = get_action('resource_show')({}, {'id': resource_id}) - self._run_validation_on_resource( - resource['id'], resource['package_id']) - else: - - query = _search_datasets() - - if query['count'] == 0: - error('No suitable datasets, exiting...') - - elif not self.options.assume_yes: - - msg = ('\nYou are about to start validation for {0} datasets' + - '.\n Do you want to continue?') - - confirm = query_yes_no(msg.format(query['count'])) - - if confirm == 'no': - error('Command aborted by user') - - result = get_action('resource_validation_run_batch')( - {'ignore_auth': True}, - {'dataset_ids': self.options.dataset_id, - 'query': self.options.search_params} - ) - print(result['output']) - - def _run_validation_on_resource(self, resource_id, dataset_id): - - log = logging.getLogger(__name__) - - get_action(u'resource_validation_run')( - {u'ignore_auth': True}, - {u'resource_id': resource_id, - u'async': True}) - - msg = ('Resource {} from dataset {} sent to ' + - 'the validation queue') - - log.debug( - msg.format(resource_id, dataset_id)) - - def _process_row(self, dataset, resource, writer): - resource_url = '{}/dataset/{}/resource/{}'.format( - config['ckan.site_url'], - dataset['name'], - resource['id']) - - validation_url = resource_url + '/validation' - - writer.writerow({ - 'dataset': dataset['name'], - 'resource_id': resource['id'], - 'format': resource['format'], - 'url': resource_url, - 'status': resource['validation_status'], - 'validation_report_url': validation_url - }) - - return - - def _process_row_full(self, dataset, resource, writer): - - limit_per_error_type = 10 - - error_counts = {} - - resource_url = '{}/dataset/{}/resource/{}'.format( - config['ckan.site_url'], - dataset['name'], - resource['id']) - - # Get validation report - validation = get_action('resource_validation_show')( - {'ignore_auth': True}, {'resource_id': resource['id']}) - - if not validation.get('report'): - return + assume_yes = self.options.assume_yes + resource_ids = self.options.resource_id + dataset_ids = self.options.dataset_id + query = self.options.search_params - errors = validation['report']['tables'][0]['errors'] - - for error in errors: - if not error['code'] in error_counts: - error_counts[error['code']] = 1 - else: - error_counts[error['code']] += 1 - - if error_counts[error['code']] > limit_per_error_type: - continue - - writer.writerow({ - 'dataset': dataset['name'], - 'resource_id': resource['id'], - 'format': resource['format'], - 'url': resource_url, - 'status': resource['validation_status'], - 'error_code': error['code'], - 'error_message': error['message'] - }) - - return error_counts + common.run_validation(assume_yes, resource_ids, dataset_ids, query) def report(self, full=False): - log = logging.getLogger(__name__) - output_csv = self.options.output_file - if output_csv == 'validation_errors_report.csv' and full: - output_csv = 'validation_errors_report_full.csv' - - outputs = { - 'tabular_resources': 0, - 'resources_failure': 0, - 'resources_error': 0, - 'resources_success': 0, - 'datasets': 0, - 'formats_success': {}, - 'formats_failure': {} - } - error_counts = {} - - with open(output_csv, 'w') as fw: - if full: - fieldnames = [ - 'dataset', 'resource_id', 'format', 'url', - 'status', 'error_code', 'error_message'] - else: - fieldnames = [ - 'dataset', 'resource_id', 'format', 'url', - 'status', 'validation_report_url'] - - writer = csv.DictWriter(fw, fieldnames=fieldnames) - writer.writeheader() - - page = 1 - while True: - query = _search_datasets(page) - - if page == 1 and query['count'] == 0: - error('No suitable datasets, exiting...') - - if query['results']: - for dataset in query['results']: - - if not dataset.get('resources'): - continue - - for resource in dataset['resources']: - - if (not resource['format'].lower() in - settings.DEFAULT_SUPPORTED_FORMATS): - continue - - outputs['tabular_resources'] += 1 - - if resource.get('validation_status'): - outputs['resources_' + resource['validation_status']] += 1 - - if resource.get('validation_status') in ( - 'failure', 'error'): - if full: - row_counts = self._process_row_full(dataset, resource, writer) - if not row_counts: - continue - for code, count in row_counts.iteritems(): - if code not in error_counts: - error_counts[code] = count - else: - error_counts[code] += count - else: - self._process_row(dataset, resource, writer) - - if resource['format'] in outputs['formats_failure']: - outputs['formats_failure'][resource['format']] += 1 - else: - outputs['formats_failure'][resource['format']] = 1 - else: - if resource['format'] in outputs['formats_success']: - outputs['formats_success'][resource['format']] += 1 - else: - outputs['formats_success'][resource['format']] = 1 - - - if len(query['results']) < self._page_size: - break - - page += 1 - else: - break - - outputs['datasets'] = query['count'] - outputs['output_csv'] = output_csv - - outputs['formats_success_output'] = '' - for count, code in sorted([(v, k) for k, v in outputs['formats_success'].iteritems()], reverse=True): - outputs['formats_success_output'] += '* {}: {}\n'.format(code, count) - - outputs['formats_failure_output'] = '' - for count, code in sorted([(v, k) for k, v in outputs['formats_failure'].iteritems()], reverse=True): - outputs['formats_failure_output'] += '* {}: {}\n'.format(code, count) - - error_counts_output = '' - if full: - for count, code in sorted([(v, k) for k, v in error_counts.iteritems()], reverse=True): - error_counts_output += '* {}: {}\n'.format(code, count) - - outputs['error_counts_output'] = error_counts_output - - msg_errors = ''' -Errors breakdown: -{} -'''.format(outputs['error_counts_output']) - - outputs['msg_errors'] = msg_errors if full else '' - - msg = ''' -Done. -{datasets} datasets with tabular resources -{tabular_resources} tabular resources -{resources_success} resources - validation success -{resources_failure} resources - validation failure -{resources_error} resources - validation error - -Formats breakdown (validation passed): -{formats_success_output} -Formats breakdown (validation failed or errored): -{formats_failure_output} -{msg_errors} -CSV Report stored in {output_csv} -'''.format(**outputs) - - - log.info(msg) + common.report(output_csv, full) diff --git a/ckanext/validation/common.py b/ckanext/validation/common.py index cd4a284e..fd01205c 100644 --- a/ckanext/validation/common.py +++ b/ckanext/validation/common.py @@ -53,4 +53,264 @@ def validation(resource_id, id=None): except NotAuthorized: return abort(403, _(u'Unauthorized to read this validation report')) except ObjectNotFound: - return abort(404, _(u'No validation report exists for this resource')) \ No newline at end of file + return abort(404, _(u'No validation report exists for this resource')) + + +############################################################################### +# CLI # +############################################################################### + + +def user_confirm(msg): + import click + return click.confirm(msg) + + +def error(msg): + ''' + Print an error message to STDOUT and exit with return code 1. + ''' + sys.stderr.write(msg) + if not msg.endswith('\n'): + sys.stderr.write('\n') + sys.exit(1) + + +def init_db(): + if tables_exist(): + print(u'Validation tables already exist') + sys.exit(0) + create_tables() + print(u'Validation tables created') + + +def run_validation(assume_yes, resource_ids, dataset_ids, search_params): + + if resource_ids: + for resource_id in resource_ids: + resource = get_action('resource_show')({}, {'id': resource_id}) + _run_validation_on_resource( + resource['id'], resource['package_id']) + else: + + query = _search_datasets() + + if query['count'] == 0: + error('No suitable datasets, exiting...') + + elif not assume_yes: + msg = ('\nYou are about to start validation for {0} datasets' + '.\n Do you want to continue?') + + if not user_confirm(msg.format(query['count'])): + error('Command aborted by user') + + result = get_action('resource_validation_run_batch')( + {'ignore_auth': True}, + {'dataset_ids': dataset_ids, + 'query': search_params} + ) + print(result['output']) + + +def _run_validation_on_resource(resource_id, dataset_id): + + get_action(u'resource_validation_run')( + {u'ignore_auth': True}, + {u'resource_id': resource_id, + u'async': True}) + + log.debug('Resource %s from dataset %s sent to the validation queue', + resource_id, dataset_id) + + +def _process_row(dataset, resource, writer): + resource_url = '{}/dataset/{}/resource/{}'.format( + config['ckan.site_url'], + dataset['name'], + resource['id']) + + validation_url = resource_url + '/validation' + + writer.writerow({ + 'dataset': dataset['name'], + 'resource_id': resource['id'], + 'format': resource['format'], + 'url': resource_url, + 'status': resource['validation_status'], + 'validation_report_url': validation_url + }) + + return + + +def _process_row_full(dataset, resource, writer): + + limit_per_error_type = 10 + + error_counts = {} + + resource_url = '{}/dataset/{}/resource/{}'.format( + config['ckan.site_url'], + dataset['name'], + resource['id']) + + # Get validation report + validation = get_action('resource_validation_show')( + {'ignore_auth': True}, {'resource_id': resource['id']}) + + if not validation.get('report'): + return + + errors = validation['report']['tables'][0]['errors'] + + for error in errors: + if not error['code'] in error_counts: + error_counts[error['code']] = 1 + else: + error_counts[error['code']] += 1 + + if error_counts[error['code']] > limit_per_error_type: + continue + + writer.writerow({ + 'dataset': dataset['name'], + 'resource_id': resource['id'], + 'format': resource['format'], + 'url': resource_url, + 'status': resource['validation_status'], + 'error_code': error['code'], + 'error_message': error['message'] + }) + + return error_counts + + +def report(output_csv, full=False): + + _page_size = 100 + + if output_csv == 'validation_errors_report.csv' and full: + output_csv = 'validation_errors_report_full.csv' + + outputs = { + 'tabular_resources': 0, + 'resources_failure': 0, + 'resources_error': 0, + 'resources_success': 0, + 'datasets': 0, + 'formats_success': {}, + 'formats_failure': {} + } + error_counts = {} + + with open(output_csv, 'w') as fw: + if full: + fieldnames = [ + 'dataset', 'resource_id', 'format', 'url', + 'status', 'error_code', 'error_message'] + else: + fieldnames = [ + 'dataset', 'resource_id', 'format', 'url', + 'status', 'validation_report_url'] + + writer = csv.DictWriter(fw, fieldnames=fieldnames) + writer.writeheader() + + page = 1 + while True: + query = _search_datasets(page) + + if page == 1 and query['count'] == 0: + error('No suitable datasets, exiting...') + + if query['results']: + for dataset in query['results']: + + if not dataset.get('resources'): + continue + + for resource in dataset['resources']: + + if (not resource['format'].lower() in + settings.DEFAULT_SUPPORTED_FORMATS): + continue + + outputs['tabular_resources'] += 1 + + if resource.get('validation_status'): + outputs['resources_' + resource['validation_status']] += 1 + + if resource.get('validation_status') in ( + 'failure', 'error'): + if full: + row_counts = _process_row_full(dataset, resource, writer) + if not row_counts: + continue + for code, count in six.iteritems(row_counts): + if code not in error_counts: + error_counts[code] = count + else: + error_counts[code] += count + else: + _process_row(dataset, resource, writer) + + if resource['format'] in outputs['formats_failure']: + outputs['formats_failure'][resource['format']] += 1 + else: + outputs['formats_failure'][resource['format']] = 1 + else: + if resource['format'] in outputs['formats_success']: + outputs['formats_success'][resource['format']] += 1 + else: + outputs['formats_success'][resource['format']] = 1 + + if len(query['results']) < _page_size: + break + + page += 1 + else: + break + + outputs['datasets'] = query['count'] + outputs['output_csv'] = output_csv + + outputs['formats_success_output'] = '' + for count, code in sorted([(v, k) for k, v in six.iteritems(outputs['formats_success'])], reverse=True): + outputs['formats_success_output'] += '* {}: {}\n'.format(code, count) + + outputs['formats_failure_output'] = '' + for count, code in sorted([(v, k) for k, v in six.iteritems(outputs['formats_failure'])], reverse=True): + outputs['formats_failure_output'] += '* {}: {}\n'.format(code, count) + + error_counts_output = '' + if full: + for count, code in sorted([(v, k) for k, v in six.iteritems(error_counts)], reverse=True): + error_counts_output += '* {}: {}\n'.format(code, count) + + outputs['error_counts_output'] = error_counts_output + + msg_errors = ''' + Errors breakdown: + {} + '''.format(outputs['error_counts_output']) + + outputs['msg_errors'] = msg_errors if full else '' + + msg = ''' + Done. + {datasets} datasets with tabular resources + {tabular_resources} tabular resources + {resources_success} resources - validation success + {resources_failure} resources - validation failure + {resources_error} resources - validation error + + Formats breakdown (validation passed): + {formats_success_output} + Formats breakdown (validation failed or errored): + {formats_failure_output} + {msg_errors} + CSV Report stored in {output_csv} + '''.format(**outputs) + + log.info(msg) From 22692ed4065a12a1f43c4a11a379680e436b0f28 Mon Sep 17 00:00:00 2001 From: William Dutton Date: Wed, 11 Dec 2024 13:31:05 +1000 Subject: [PATCH 4/8] chore: controller uses common function --- ckanext/validation/cli.py | 7 ++++--- ckanext/validation/controller.py | 36 ++++---------------------------- 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/ckanext/validation/cli.py b/ckanext/validation/cli.py index 5a68627d..290dcbf9 100644 --- a/ckanext/validation/cli.py +++ b/ckanext/validation/cli.py @@ -1,4 +1,4 @@ -import sys +# encoding: utf-8 import click @@ -11,11 +11,12 @@ def get_commands(): @click.group() def validation(): - """Harvests remotely mastered metadata.""" + """Validation management commands. + """ pass -@validation.command() +@validation.command(name='init-db') def init_db(): """ Initialize database tables. """ diff --git a/ckanext/validation/controller.py b/ckanext/validation/controller.py index 91224649..b4396a21 100644 --- a/ckanext/validation/controller.py +++ b/ckanext/validation/controller.py @@ -1,39 +1,11 @@ # encoding: utf-8 -from ckantoolkit import ( - BaseController, c, NotAuthorized, ObjectNotFound, - abort, _, render, get_action) +from ckantoolkit import BaseController + +from ckanext.validation import common class ValidationController(BaseController): def validation(self, resource_id): - - try: - validation = get_action(u'resource_validation_show')( - {u'user': c.user}, - {u'resource_id': resource_id}) - - resource = get_action(u'resource_show')( - {u'user': c.user}, - {u'id': resource_id}) - - dataset = get_action(u'package_show')( - {u'user': c.user}, - {u'id': resource[u'package_id']}) - - # Needed for core resource templates - c.package = c.pkg_dict = dataset - c.resource = resource - - return render(u'validation/validation_read.html', extra_vars={ - u'validation': validation, - u'resource': resource, - u'dataset': dataset, - }) - - except NotAuthorized: - abort(403, _(u'Unauthorized to read this validation report')) - except ObjectNotFound: - - abort(404, _(u'No validation report exists for this resource')) + return common.validation(resource_id) From f001c86f493615c645d8b781c12bdadfcb2717cb Mon Sep 17 00:00:00 2001 From: William Dutton Date: Wed, 11 Dec 2024 13:32:16 +1000 Subject: [PATCH 5/8] chore: update model --- ckanext/validation/model.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ckanext/validation/model.py b/ckanext/validation/model.py index 52841c06..9e82b7f7 100644 --- a/ckanext/validation/model.py +++ b/ckanext/validation/model.py @@ -7,6 +7,7 @@ from sqlalchemy import Column, Unicode, DateTime from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.dialects.postgresql import JSON +from six import text_type from ckan.model.meta import metadata @@ -14,7 +15,7 @@ def make_uuid(): - return str(uuid.uuid4()) + return text_type(uuid.uuid4()) Base = declarative_base(metadata=metadata) @@ -25,10 +26,20 @@ class Validation(Base): id = Column(Unicode, primary_key=True, default=make_uuid) resource_id = Column(Unicode) + # status can be one of these values: + # created: Job created and put onto queue + # running: Job picked up by worker and being processed + # success: Validation Successful and report attached + # failure: Validation Failed and report attached + # error: Validation Job could not create validation report status = Column(Unicode, default=u'created') + # created is when job was added created = Column(DateTime, default=datetime.datetime.utcnow) + # finished is when report was generated, is None when new or restarted finished = Column(DateTime) + # json object of report, can be None report = Column(JSON) + # json object of error, can be None error = Column(JSON) From 080855574430ca1bccef48e6e11b124ef43f5208 Mon Sep 17 00:00:00 2001 From: William Dutton Date: Wed, 11 Dec 2024 13:48:56 +1000 Subject: [PATCH 6/8] chore: move functions out of plugin into utils --- ckanext/validation/plugin.py | 116 +++-------------------------------- ckanext/validation/utils.py | 106 +++++++++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 107 deletions(-) diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py index b9ae0f77..a47648ce 100644 --- a/ckanext/validation/plugin.py +++ b/ckanext/validation/plugin.py @@ -2,29 +2,17 @@ import json import logging -import cgi -from werkzeug.datastructures import FileStorage as FlaskFileStorage + import ckan.plugins as p import ckantoolkit as t -from . import settings, validators +from . import settings, utils, validators from .helpers import _get_helpers from ckanext.validation.model import tables_exist from .logic import action, auth -from ckanext.validation.helpers import ( - get_validation_badge, - validation_extract_report_from_errors, - dump_json_value, - bootstrap_version, - validation_dict, - use_webassets, -) -from ckanext.validation.validators import ( - resource_schema_validator, - validation_options_validator, -) + from ckanext.validation.utils import ( get_create_mode_from_config, get_update_mode_from_config, @@ -32,7 +20,7 @@ from ckanext.validation.interfaces import IDataValidation from ckanext.validation import views, cli -ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage) + log = logging.getLogger(__name__) @@ -99,38 +87,6 @@ def get_validators(self): resources_to_validate = {} packages_to_skip = {} - def _process_schema_fields(self, data_dict): - u''' - Normalize the different ways of providing the `schema` field - - 1. If `schema_upload` is provided and it's a valid file, the contents - are read into `schema`. - 2. If `schema_url` is provided and looks like a valid URL, it's copied - to `schema` - 3. If `schema_json` is provided, it's copied to `schema`. - - All the 3 `schema_*` fields are removed from the data_dict. - Note that the data_dict still needs to pass validation - ''' - - schema_upload = data_dict.pop(u'schema_upload', None) - schema_url = data_dict.pop(u'schema_url', None) - schema_json = data_dict.pop(u'schema_json', None) - if isinstance(schema_upload, ALLOWED_UPLOAD_TYPES): - uploaded_file = _get_underlying_file(schema_upload) - data_dict[u'schema'] = uploaded_file.read() - if isinstance(data_dict["schema"], (bytes, bytearray)): - data_dict["schema"] = data_dict["schema"].decode() - elif schema_url: - - if (not isinstance(schema_url, str) or - not schema_url.lower()[:4] == u'http'): - raise t.ValidationError({u'schema_url': 'Must be a valid URL'}) - data_dict[u'schema'] = schema_url - elif schema_json: - data_dict[u'schema'] = schema_json - - return data_dict # CKAN < 2.10 def before_create(self, context, data_dict): @@ -142,7 +98,7 @@ def before_resource_create(self, context, data_dict): is_dataset = self._data_dict_is_dataset(data_dict) if not is_dataset: context["_resource_create_call"] = True - return self._process_schema_fields(data_dict) + return utils.process_schema_fields(data_dict) # CKAN < 2.10 def after_create(self, context, data_dict): @@ -196,7 +152,7 @@ def _handle_validation_for_resource(self, context, resource): log.debug('Skipping validation for resource %s', resource['id']) return - _run_async_validation(resource[u'id']) + utils._run_async_validation(resource[u'id']) # CKAN < 2.10 def before_update(self, context, current_resource, updated_resource): @@ -205,7 +161,7 @@ def before_update(self, context, current_resource, updated_resource): # CKAN >= 2.10 def before_resource_update(self, context, current_resource, updated_resource): - updated_resource = self._process_schema_fields(updated_resource) + updated_resource = utils.process_schema_fields(updated_resource) # the call originates from a resource API, so don't validate the entire package package_id = updated_resource.get('package_id') @@ -306,10 +262,10 @@ def after_resource_update(self, context, data_dict): del self.resources_to_validate[resource_id] - _run_async_validation(resource_id) + utils._run_async_validation(resource_id) - if _should_remove_unsupported_resource_validation_reports(data_dict): - p.toolkit.enqueue_job(fn=_remove_unsupported_resource_validation_reports, args=[resource_id]) + if utils._should_remove_unsupported_resource_validation_reports(data_dict): + p.toolkit.enqueue_job(fn=utils._remove_unsupported_resource_validation_reports, args=[resource_id]) # IPackageController @@ -332,55 +288,3 @@ def before_dataset_index(self, index_dict): return index_dict - -def _run_async_validation(resource_id): - - try: - t.get_action(u'resource_validation_run')( - {u'ignore_auth': True}, - {u'resource_id': resource_id, - u'async': True}) - except t.ValidationError as e: - log.warning( - u'Could not run validation for resource %s: %s', - resource_id, e) - -def _get_underlying_file(wrapper): - if isinstance(wrapper, FlaskFileStorage): - return wrapper.stream - return wrapper.file - - -def _should_remove_unsupported_resource_validation_reports(res_dict): - if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)): - return False - return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS - and (res_dict.get('url_type') == 'upload' - or not res_dict.get('url_type')) - and (t.h.asbool(res_dict.get('validation_status', False)) - or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False)))) - - -def _remove_unsupported_resource_validation_reports(resource_id): - """ - Callback to remove unsupported validation reports. - Controlled by config value: ckanext.validation.clean_validation_reports. - Double check the resource format. Only supported Validation formats should have validation reports. - If the resource format is not supported, we should delete the validation reports. - """ - context = {"ignore_auth": True} - try: - res = p.toolkit.get_action('resource_show')(context, {"id": resource_id}) - except t.ObjectNotFound: - log.error('Resource %s does not exist.', resource_id) - return - - if _should_remove_unsupported_resource_validation_reports(res): - log.info('Unsupported resource format "%s". Deleting validation reports for resource %s', - res.get(u'format', u''), res['id']) - try: - p.toolkit.get_action('resource_validation_delete')(context, { - "resource_id": res['id']}) - log.info('Validation reports deleted for resource %s', res['id']) - except t.ObjectNotFound: - log.error('Validation reports for resource %s do not exist', res['id']) diff --git a/ckanext/validation/utils.py b/ckanext/validation/utils.py index 6c16d77f..f5cdb9fe 100644 --- a/ckanext/validation/utils.py +++ b/ckanext/validation/utils.py @@ -1,5 +1,6 @@ import os import logging +import cgi from ckan.plugins import PluginImplementations from ckan.lib.uploader import ResourceUpload @@ -7,9 +8,112 @@ from ckanext.validation.interfaces import IPipeValidation - log = logging.getLogger(__name__) +from . import settings +import ckan.plugins as p +import ckantoolkit as t + +from werkzeug.datastructures import FileStorage as FlaskFileStorage +ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage) + +def process_schema_fields(data_dict): + u''' + Normalize the different ways of providing the `schema` field + + 1. If `schema_upload` is provided and it's a valid file, the contents + are read into `schema`. + 2. If `schema_url` is provided and looks like a valid URL, it's copied + to `schema` + 3. If `schema_json` is provided, it's copied to `schema`. + + All the 3 `schema_*` fields are removed from the data_dict. + Note that the data_dict still needs to pass validation + ''' + + schema_upload = data_dict.pop(u'schema_upload', None) + schema_url = data_dict.pop(u'schema_url', None) + schema_json = data_dict.pop(u'schema_json', None) + + if isinstance(schema_upload, ALLOWED_UPLOAD_TYPES): + uploaded_file = _get_underlying_file(schema_upload) + data_dict[u'schema'] = uploaded_file.read() + if isinstance(data_dict["schema"], (bytes, bytearray)): + data_dict["schema"] = data_dict["schema"].decode() + elif schema_url: + + if (not isinstance(schema_url, str) or + not schema_url.lower()[:4] == u'http'): + raise t.ValidationError({u'schema_url': 'Must be a valid URL'}) + data_dict[u'schema'] = schema_url + elif schema_json: + data_dict[u'schema'] = schema_json + + return data_dict + +def _get_underlying_file(wrapper): + if isinstance(wrapper, FlaskFileStorage): + return wrapper.stream + return wrapper.file + + +def _should_remove_unsupported_resource_validation_reports(res_dict): + if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)): + return False + return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS + and (res_dict.get('url_type') == 'upload' + or not res_dict.get('url_type')) + and (t.h.asbool(res_dict.get('validation_status', False)) + or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False)))) + + +def _run_async_validation(resource_id): + + try: + t.get_action(u'resource_validation_run')( + {u'ignore_auth': True}, + {u'resource_id': resource_id, + u'async': True}) + except t.ValidationError as e: + log.warning( + u'Could not run validation for resource %s: %s', + resource_id, e) + + +def _should_remove_unsupported_resource_validation_reports(res_dict): + if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)): + return False + return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS + and (res_dict.get('url_type') == 'upload' + or not res_dict.get('url_type')) + and (t.h.asbool(res_dict.get('validation_status', False)) + or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False)))) + + +def _remove_unsupported_resource_validation_reports(resource_id): + """ + Callback to remove unsupported validation reports. + Controlled by config value: ckanext.validation.clean_validation_reports. + Double check the resource format. Only supported Validation formats should have validation reports. + If the resource format is not supported, we should delete the validation reports. + """ + context = {"ignore_auth": True} + try: + res = p.toolkit.get_action('resource_show')(context, {"id": resource_id}) + except t.ObjectNotFound: + log.error('Resource %s does not exist.', resource_id) + return + + if _should_remove_unsupported_resource_validation_reports(res): + log.info('Unsupported resource format "%s". Deleting validation reports for resource %s', + res.get(u'format', u''), res['id']) + try: + p.toolkit.get_action('resource_validation_delete')(context, { + "resource_id": res['id']}) + log.info('Validation reports deleted for resource %s', res['id']) + except t.ObjectNotFound: + log.error('Validation reports for resource %s do not exist', res['id']) + def get_update_mode_from_config(): if asbool( From b9c04fd0726857501649e9ab1d2594aa9c139a5e Mon Sep 17 00:00:00 2001 From: William Dutton Date: Wed, 11 Dec 2024 14:52:32 +1000 Subject: [PATCH 7/8] chore: cleanup --- ckanext/validation/jobs.py | 87 +++++++++++++++++------------------- ckanext/validation/plugin.py | 19 ++++---- ckanext/validation/utils.py | 10 ----- 3 files changed, 51 insertions(+), 65 deletions(-) diff --git a/ckanext/validation/jobs.py b/ckanext/validation/jobs.py index f0d658ec..74726eb6 100644 --- a/ckanext/validation/jobs.py +++ b/ckanext/validation/jobs.py @@ -8,18 +8,15 @@ import requests from sqlalchemy.orm.exc import NoResultFound from frictionless import validate, system, Report, Schema, Dialect, Check +from six import string_types from ckan.model import Session import ckan.lib.uploader as uploader import ckantoolkit as t -from ckanext.validation.model import Validation -from ckanext.validation.utils import ( - get_update_mode_from_config, - send_validation_report, - validation_dictize, -) +from .model import Validation +from . import utils log = logging.getLogger(__name__) @@ -27,7 +24,17 @@ def run_validation_job(resource): - log.debug('Validating resource %s', resource['id']) + # handle either a resource dict or just an ID + # ID is more efficient, as resource dicts can be very large + if isinstance(resource, string_types): + log.debug(u'run_validation_job: calling resource_show: %s', resource) + resource = t.get_action('resource_show')({'ignore_auth': True}, {'id': resource}) + + resource_id = resource.get('id') + if resource_id: + log.debug(u'Validating resource: %s', resource_id) + else: + log.debug(u'Validating resource dict: %s', resource) try: validation = Session.query(Validation).filter( @@ -59,37 +66,38 @@ def run_validation_job(resource): {'ignore_auth': True}, {'id': resource['package_id']}) source = None - if resource.get('url_type') == 'upload': + if resource.get(u'url_type') == u'upload': upload = uploader.get_resource_uploader(resource) if isinstance(upload, uploader.ResourceUpload): - source = upload.get_path(resource['id']) + source = upload.get_path(resource[u'id']) else: # Upload is not the default implementation (ie it's a cloud storage # implementation) pass_auth_header = t.asbool( - t.config.get('ckanext.validation.pass_auth_header', True)) - if dataset['private'] and pass_auth_header: + t.config.get(u'ckanext.validation.pass_auth_header', True)) + if dataset[u'private'] and pass_auth_header: s = requests.Session() s.headers.update({ - 'Authorization': t.config.get( - 'ckanext.validation.pass_auth_header_value', - _get_site_user_api_key()) + u'Authorization': t.config.get( + u'ckanext.validation.pass_auth_header_value', + utils.get_site_user_api_key()) }) - options['http_session'] = s + options[u'http_session'] = s if not source: - source = resource['url'] - - schema = resource.get('schema') - if schema: - if isinstance(schema, str): - if schema.startswith('http'): - r = requests.get(schema) - schema = r.json() + source = resource[u'url'] + + schema = resource.get(u'schema') + if schema and isinstance(schema, string_types): + if schema.startswith('http'): + r = requests.get(schema) + schema = r.json() + else: schema = json.loads(schema) - _format = resource['format'].lower() + _format = resource[u'format'].lower() + report = _validate_table(source, _format=_format, schema=schema, **options) # Hide uploaded files @@ -127,30 +135,27 @@ def run_validation_job(resource): 'validation_timestamp': validation.finished.isoformat(), } - if get_update_mode_from_config() == 'sync': + if utils.get_update_mode_from_config() == 'sync': data_dict['_skip_next_validation'] = True, - patch_context = { - 'ignore_auth': True, + t.get_action('resource_patch')( + {'ignore_auth': True, 'user': t.get_action('get_site_user')({'ignore_auth': True})['name'], - '_validation_performed': True - } - t.get_action('resource_patch')(patch_context, data_dict) - send_validation_report(validation_dictize(validation)) - + '_validation_performed': True}, + data_dict) + utils.send_validation_report(utils.validation_dictize(validation)) - -def _validate_table(source, _format='csv', schema=None, **options): +def _validate_table(source, _format=u'csv', schema=None, **options): # This option is needed to allow Frictionless Framework to validate absolute paths frictionless_context = { 'trusted': True } http_session = options.pop('http_session', None) or requests.Session() - use_proxy = 'ckan.download_proxy' in t.config + use_proxy = 'ckan.download_proxy' in t.config if use_proxy: proxy = t.config.get('ckan.download_proxy') - log.debug('Download resource for validation via proxy: %s', proxy) + log.debug(u'Download resource for validation via proxy: %s', proxy) http_session.proxies.update({'http': proxy, 'https': proxy}) frictionless_context['http_session'] = http_session @@ -168,14 +173,6 @@ def _validate_table(source, _format='csv', schema=None, **options): with system.use_context(**frictionless_context): report = validate(source, format=_format, schema=resource_schema, **options) - log.debug('Validating source: %s', source) + log.debug(u'Validating source: %s', source) return report - - -def _get_site_user_api_key(): - - site_user_name = t.get_action('get_site_user')({'ignore_auth': True}, {}) - site_user = t.get_action('get_site_user')( - {'ignore_auth': True}, {'id': site_user_name}) - return site_user['apikey'] diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py index a47648ce..2a92e4bc 100644 --- a/ckanext/validation/plugin.py +++ b/ckanext/validation/plugin.py @@ -6,12 +6,12 @@ import ckan.plugins as p -import ckantoolkit as t +import ckantoolkit as tk -from . import settings, utils, validators +from . import settings as s, utils, validators from .helpers import _get_helpers -from ckanext.validation.model import tables_exist from .logic import action, auth +from .model import tables_exist from ckanext.validation.utils import ( get_create_mode_from_config, @@ -58,9 +58,9 @@ def update_config(self, config_): else: log.debug(u'Validation tables exist') - t.add_template_directory(config_, u'templates') - t.add_public_directory(config_, u'public') - t.add_resource(u'webassets', 'ckanext-validation') + tk.add_template_directory(config_, u'templates') + tk.add_public_directory(config_, u'public') + tk.add_resource(u'webassets', 'ckanext-validation') # IActions @@ -141,7 +141,7 @@ def _handle_validation_for_resource(self, context, resource): ) and ( # Make sure format is supported resource.get(u'format', u'').lower() in - settings.SUPPORTED_FORMATS + s.SUPPORTED_FORMATS )): needs_validation = True @@ -166,7 +166,7 @@ def before_resource_update(self, context, current_resource, updated_resource): # the call originates from a resource API, so don't validate the entire package package_id = updated_resource.get('package_id') if not package_id: - existing_resource = t.get_action('resource_show')( + existing_resource = tk.get_action('resource_show')( context={'ignore_auth': True}, data_dict={'id': updated_resource['id']}) if existing_resource: package_id = existing_resource['package_id'] @@ -190,7 +190,7 @@ def before_resource_update(self, context, current_resource, updated_resource): ) and ( # Make sure format is supported updated_resource.get(u'format', u'').lower() in - settings.SUPPORTED_FORMATS + s.SUPPORTED_FORMATS )): needs_validation = True @@ -287,4 +287,3 @@ def before_dataset_index(self, index_dict): index_dict['vocab_validation_status'] = res_status return index_dict - diff --git a/ckanext/validation/utils.py b/ckanext/validation/utils.py index f5cdb9fe..4c0881a7 100644 --- a/ckanext/validation/utils.py +++ b/ckanext/validation/utils.py @@ -80,16 +80,6 @@ def _run_async_validation(resource_id): resource_id, e) -def _should_remove_unsupported_resource_validation_reports(res_dict): - if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)): - return False - return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS - and (res_dict.get('url_type') == 'upload' - or not res_dict.get('url_type')) - and (t.h.asbool(res_dict.get('validation_status', False)) - or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False)))) - - def _remove_unsupported_resource_validation_reports(resource_id): """ Callback to remove unsupported validation reports. From 47476bd9ea7eb9573874de07214fdd8e03411c56 Mon Sep 17 00:00:00 2001 From: William Dutton Date: Thu, 12 Dec 2024 08:16:48 +1000 Subject: [PATCH 8/8] chore: rename helpers._get_helpers() to get_helpers() --- ckanext/validation/helpers.py | 2 +- ckanext/validation/plugin.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ckanext/validation/helpers.py b/ckanext/validation/helpers.py index aa36572b..5192ac89 100644 --- a/ckanext/validation/helpers.py +++ b/ckanext/validation/helpers.py @@ -4,7 +4,7 @@ from ckan.lib.helpers import url_for_static from ckantoolkit import url_for, _, config, asbool, literal, h -def _get_helpers(): +def get_helpers(): validators = ( get_validation_badge, validation_extract_report_from_errors, diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py index 2a92e4bc..af5fd081 100644 --- a/ckanext/validation/plugin.py +++ b/ckanext/validation/plugin.py @@ -9,7 +9,7 @@ import ckantoolkit as tk from . import settings as s, utils, validators -from .helpers import _get_helpers +from .helpers import get_helpers from .logic import action, auth from .model import tables_exist @@ -75,7 +75,7 @@ def get_auth_functions(self): # ITemplateHelpers def get_helpers(self): - return _get_helpers() + return get_helpers() # IValidators