From 8ec2043433903d116c198e11d14cf931289f7782 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Wed, 11 Dec 2024 07:29:38 +1000
Subject: [PATCH 1/8] chore: alignment, move logic into logic folder and split
 auth and actions apart, move plugin from folder to root plugin.py

---
 .gitignore                                    |  1 +
 ckanext/validation/logic/__init__.py          |  0
 .../validation/{logic.py => logic/action.py}  | 42 ++++++-------------
 ckanext/validation/logic/auth.py              | 39 +++++++++++++++++
 .../{plugin/__init__.py => plugin.py}         | 33 +++------------
 ckanext/validation/tests/test_interfaces.py   |  8 ++--
 ckanext/validation/tests/test_logic.py        | 12 +++---
 ckanext/validation/tests/test_plugin.py       | 42 +++++++++----------
 8 files changed, 90 insertions(+), 87 deletions(-)
 create mode 100644 ckanext/validation/logic/__init__.py
 rename ckanext/validation/{logic.py => logic/action.py} (96%)
 create mode 100644 ckanext/validation/logic/auth.py
 rename ckanext/validation/{plugin/__init__.py => plugin.py} (90%)

diff --git a/.gitignore b/.gitignore
index 7bbc71c0..265a4a55 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,3 +99,4 @@ ENV/
 
 # mypy
 .mypy_cache/
+.idea
diff --git a/ckanext/validation/logic/__init__.py b/ckanext/validation/logic/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ckanext/validation/logic.py b/ckanext/validation/logic/action.py
similarity index 96%
rename from ckanext/validation/logic.py
rename to ckanext/validation/logic/action.py
index 66ced821..0d4f8ec8 100644
--- a/ckanext/validation/logic.py
+++ b/ckanext/validation/logic/action.py
@@ -26,6 +26,19 @@
 log = logging.getLogger(__name__)
 
 
+def get_actions():
+    validators = (
+        resource_validation_run,
+        resource_validation_show,
+        resource_validation_delete,
+        resource_validation_run_batch,
+        resource_create,
+        resource_update,
+    )
+
+    return {"{}".format(func.__name__): func for func in validators}
+
+
 def enqueue_job(*args, **kwargs):
     try:
         return t.enqueue_job(*args, **kwargs)
@@ -34,35 +47,6 @@ def enqueue_job(*args, **kwargs):
         return enqueue_job_legacy(*args, **kwargs)
 
 
-# Auth
-
-def auth_resource_validation_run(context, data_dict):
-    if t.check_access(
-            u'resource_update', context, {u'id': data_dict[u'resource_id']}):
-        return {u'success': True}
-    return {u'success': False}
-
-
-def auth_resource_validation_delete(context, data_dict):
-    if t.check_access(
-            u'resource_update', context, {u'id': data_dict[u'resource_id']}):
-        return {u'success': True}
-    return {u'success': False}
-
-
-@t.auth_allow_anonymous_access
-def auth_resource_validation_show(context, data_dict):
-    if t.check_access(
-            u'resource_show', context, {u'id': data_dict[u'resource_id']}):
-        return {u'success': True}
-    return {u'success': False}
-
-
-def auth_resource_validation_run_batch(context, data_dict):
-    '''u Sysadmins only'''
-    return {u'success': False}
-
-
 # Actions
 
 
diff --git a/ckanext/validation/logic/auth.py b/ckanext/validation/logic/auth.py
new file mode 100644
index 00000000..2139ac99
--- /dev/null
+++ b/ckanext/validation/logic/auth.py
@@ -0,0 +1,39 @@
+import ckan.plugins.toolkit as tk
+
+
+def get_auth_functions():
+    validators = (
+        resource_validation_run,
+        resource_validation_delete,
+        resource_validation_show,
+        resource_validation_run_batch,
+    )
+
+    return {"{}".format(func.__name__): func for func in validators}
+
+
+def resource_validation_run(context, data_dict):
+    if tk.check_access(u'resource_update', context,
+                       {u'id': data_dict[u'resource_id']}):
+        return {u'success': True}
+    return {u'success': False}
+
+
+def resource_validation_delete(context, data_dict):
+    if tk.check_access(u'resource_update', context,
+                       {u'id': data_dict[u'resource_id']}):
+        return {u'success': True}
+    return {u'success': False}
+
+
+@tk.auth_allow_anonymous_access
+def resource_validation_show(context, data_dict):
+    if tk.check_access(u'resource_show', context,
+                       {u'id': data_dict[u'resource_id']}):
+        return {u'success': True}
+    return {u'success': False}
+
+
+def resource_validation_run_batch(context, data_dict):
+    '''u Sysadmins only'''
+    return {u'success': False}
diff --git a/ckanext/validation/plugin/__init__.py b/ckanext/validation/plugin.py
similarity index 90%
rename from ckanext/validation/plugin/__init__.py
rename to ckanext/validation/plugin.py
index c3e8b467..ddda1ab4 100644
--- a/ckanext/validation/plugin/__init__.py
+++ b/ckanext/validation/plugin.py
@@ -10,14 +10,7 @@
 
 from ckanext.validation import settings
 from ckanext.validation.model import tables_exist
-from ckanext.validation.logic import (
-    resource_validation_run, resource_validation_show,
-    resource_validation_delete, resource_validation_run_batch,
-    auth_resource_validation_run, auth_resource_validation_show,
-    auth_resource_validation_delete, auth_resource_validation_run_batch,
-    resource_create as custom_resource_create,
-    resource_update as custom_resource_update,
-)
+from .logic import action, auth
 from ckanext.validation.helpers import (
     get_validation_badge,
     validation_extract_report_from_errors,
@@ -75,33 +68,19 @@ def update_config(self, config_):
         else:
             log.debug(u'Validation tables exist')
 
-        t.add_template_directory(config_, u'../templates')
-        t.add_public_directory(config_, u'../public')
-        t.add_resource(u'../webassets', 'ckanext-validation')
+        t.add_template_directory(config_, u'templates')
+        t.add_public_directory(config_, u'public')
+        t.add_resource(u'webassets', 'ckanext-validation')
 
     # IActions
 
     def get_actions(self):
-        new_actions = {
-            u'resource_validation_run': resource_validation_run,
-            u'resource_validation_show': resource_validation_show,
-            u'resource_validation_delete': resource_validation_delete,
-            u'resource_validation_run_batch': resource_validation_run_batch,
-            u'resource_create': custom_resource_create,
-            u'resource_update': custom_resource_update,
-        }
-
-        return new_actions
+        return action.get_actions()
 
     # IAuthFunctions
 
     def get_auth_functions(self):
-        return {
-            u'resource_validation_run': auth_resource_validation_run,
-            u'resource_validation_show': auth_resource_validation_show,
-            u'resource_validation_delete': auth_resource_validation_delete,
-            u'resource_validation_run_batch': auth_resource_validation_run_batch,
-        }
+        return auth.get_auth_functions()
 
     # ITemplateHelpers
 
diff --git a/ckanext/validation/tests/test_interfaces.py b/ckanext/validation/tests/test_interfaces.py
index cadca410..4da4e491 100644
--- a/ckanext/validation/tests/test_interfaces.py
+++ b/ckanext/validation/tests/test_interfaces.py
@@ -137,7 +137,7 @@ def test_can_validate_called_on_update_sync_no_validation(self, mock_validation)
 class TestInterfaceAsync():
 
     @pytest.mark.ckan_config('ckanext.validation.run_on_create_async', True)
-    @mock.patch('ckanext.validation.logic.enqueue_job')
+    @mock.patch('ckanext.validation.logic.action.enqueue_job')
     def test_can_validate_called_on_create_async(self, mock_validation):
 
         dataset = factories.Dataset()
@@ -152,7 +152,7 @@ def test_can_validate_called_on_create_async(self, mock_validation):
         assert mock_validation.called
 
     @pytest.mark.ckan_config('ckanext.validation.run_on_create_async', True)
-    @mock.patch('ckanext.validation.logic.enqueue_job')
+    @mock.patch('ckanext.validation.logic.action.enqueue_job')
     def test_can_validate_called_on_create_async_no_validation(self, mock_validation):
 
         dataset = factories.Dataset()
@@ -169,7 +169,7 @@ def test_can_validate_called_on_create_async_no_validation(self, mock_validation
 
     @pytest.mark.ckan_config('ckanext.validation.run_on_create_async', False)
     @pytest.mark.ckan_config('ckanext.validation.run_on_update_async', True)
-    @mock.patch('ckanext.validation.logic.enqueue_job')
+    @mock.patch('ckanext.validation.logic.action.enqueue_job')
     def test_can_validate_called_on_update_async(self, mock_validation):
 
         dataset = factories.Dataset()
@@ -187,7 +187,7 @@ def test_can_validate_called_on_update_async(self, mock_validation):
 
     @pytest.mark.ckan_config('ckanext.validation.run_on_create_async', False)
     @pytest.mark.ckan_config('ckanext.validation.run_on_update_async', True)
-    @mock.patch('ckanext.validation.logic.enqueue_job')
+    @mock.patch('ckanext.validation.logic.action.enqueue_job')
     def test_can_validate_called_on_update_async_no_validation(self, mock_validation):
 
         dataset = factories.Dataset()
diff --git a/ckanext/validation/tests/test_logic.py b/ckanext/validation/tests/test_logic.py
index 1271a152..bbdfbb86 100644
--- a/ckanext/validation/tests/test_logic.py
+++ b/ckanext/validation/tests/test_logic.py
@@ -59,14 +59,14 @@ def test_resource_validation_no_url_or_upload(self):
 
         assert "Resource must have a valid URL" in str(e)
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_resource_validation_with_url(self, mock_enqueue_job):
 
         resource = factories.Resource(url="http://example.com", format="csv")
 
         call_action("resource_validation_run", resource_id=resource["id"])
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_resource_validation_with_upload(self, mock_enqueue_job):
 
         resource = factories.Resource(url="", url_type="upload", format="csv")
@@ -85,7 +85,7 @@ def test_resource_validation_run_starts_job(self):
 
         assert len(jobs_after) == len(jobs) + 1
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_resource_validation_creates_validation_object(self, mock_enqueue_job):
 
         resource = factories.Resource(format="csv")
@@ -106,7 +106,7 @@ def test_resource_validation_creates_validation_object(self, mock_enqueue_job):
         assert validation.error is None
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_resource_validation_resets_existing_validation_object(
         self, mock_enqueue_job
     ):
@@ -145,7 +145,7 @@ def test_resource_validation_resets_existing_validation_object(
         assert validation.report is None
         assert validation.error is None
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_resource_validation_only_called_on_resource_created(
         self, mock_enqueue_job
     ):
@@ -170,7 +170,7 @@ def test_resource_validation_only_called_on_resource_created(
         assert mock_enqueue_job.call_count == 1
         assert mock_enqueue_job.call_args[0][1][0]["id"] == resource2["id"]
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_resource_validation_only_called_on_resource_updated(
         self, mock_enqueue_job
     ):
diff --git a/ckanext/validation/tests/test_plugin.py b/ckanext/validation/tests/test_plugin.py
index 866db6c0..ac6f368e 100644
--- a/ckanext/validation/tests/test_plugin.py
+++ b/ckanext/validation/tests/test_plugin.py
@@ -12,7 +12,7 @@
 class TestResourceControllerHooksUpdate(object):
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_on_other_fields(self, mock_enqueue):
 
         resource = {"format": "CSV"}
@@ -26,7 +26,7 @@ def test_validation_does_not_run_on_other_fields(self, mock_enqueue):
         mock_enqueue.assert_not_called()
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_on_other_formats(self, mock_enqueue):
 
         resource = {"format": "PDF"}
@@ -38,7 +38,7 @@ def test_validation_does_not_run_on_other_formats(self, mock_enqueue):
         mock_enqueue.assert_not_called()
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_run_on_upload(self, mock_enqueue):
 
         resource = {"format": "CSV", "upload": "mock_upload", "url_type": "upload"}
@@ -53,7 +53,7 @@ def test_validation_run_on_upload(self, mock_enqueue):
         assert mock_enqueue.call_args[0][1][0]["id"] == dataset["resources"][0]["id"]
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_run_on_url_change(self, mock_enqueue):
 
         resource = {"format": "CSV", "url": "https://some.url"}
@@ -70,7 +70,7 @@ def test_validation_run_on_url_change(self, mock_enqueue):
         assert mock_enqueue.call_args[0][1][0]["id"] == dataset["resources"][0]["id"]
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_run_on_schema_change(self, mock_enqueue):
 
         resource = {
@@ -93,7 +93,7 @@ def test_validation_run_on_schema_change(self, mock_enqueue):
         assert mock_enqueue.call_args[0][1][0]["id"] == dataset["resources"][0]["id"]
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_run_on_format_change(self, mock_enqueue):
 
         resource = factories.Resource()
@@ -109,7 +109,7 @@ def test_validation_run_on_format_change(self, mock_enqueue):
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
     @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_when_config_false(self, mock_enqueue):
 
         resource = factories.Resource(format="CSV")
@@ -124,14 +124,14 @@ def test_validation_does_not_run_when_config_false(self, mock_enqueue):
 @pytest.mark.usefixtures("clean_db", "validation_setup", "with_plugins")
 class TestResourceControllerHooksCreate(object):
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_on_other_formats(self, mock_enqueue):
 
         factories.Resource(format="PDF")
 
         mock_enqueue.assert_not_called()
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False)
     def test_validation_run_with_upload(self, mock_enqueue):
 
@@ -142,7 +142,7 @@ def test_validation_run_with_upload(self, mock_enqueue):
         assert mock_enqueue.call_args[0][0] == run_validation_job
         assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"]
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False)
     def test_validation_run_with_url(self, mock_enqueue):
 
@@ -155,7 +155,7 @@ def test_validation_run_with_url(self, mock_enqueue):
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
     @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_when_config_false(self, mock_enqueue):
 
         dataset = factories.Dataset()
@@ -174,7 +174,7 @@ def test_validation_does_not_run_when_config_false(self, mock_enqueue):
 @pytest.mark.usefixtures("clean_db", "validation_setup", "with_plugins")
 class TestPackageControllerHooksCreate(object):
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_on_other_formats(self, mock_enqueue):
 
         factories.Dataset(resources=[{"format": "PDF"}])
@@ -182,14 +182,14 @@ def test_validation_does_not_run_on_other_formats(self, mock_enqueue):
         mock_enqueue.assert_not_called()
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_when_config_false(self, mock_enqueue):
 
         factories.Dataset(resources=[{"format": "CSV", "url": "http://some.data"}])
 
         mock_enqueue.assert_not_called()
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_run_with_upload(self, mock_enqueue):
 
         resource = {"id": "test-resource-id", "format": "CSV", "url_type": "upload"}
@@ -200,7 +200,7 @@ def test_validation_run_with_upload(self, mock_enqueue):
         assert mock_enqueue.call_args[0][0] == run_validation_job
         assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"]
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_run_with_url(self, mock_enqueue):
 
         resource = {
@@ -215,7 +215,7 @@ def test_validation_run_with_url(self, mock_enqueue):
         assert mock_enqueue.call_args[0][0] == run_validation_job
         assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"]
 
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_run_only_supported_formats(self, mock_enqueue):
 
         resource1 = {
@@ -241,7 +241,7 @@ def test_validation_run_only_supported_formats(self, mock_enqueue):
 class TestPackageControllerHooksUpdate(object):
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_runs_with_url(self, mock_enqueue):
 
         resource = {
@@ -263,7 +263,7 @@ def test_validation_runs_with_url(self, mock_enqueue):
         assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"]
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_runs_with_upload(self, mock_enqueue):
 
         resource = {"id": "test-resource-id", "format": "CSV", "url_type": "upload"}
@@ -281,7 +281,7 @@ def test_validation_runs_with_upload(self, mock_enqueue):
         assert mock_enqueue.call_args[0][1][0]["id"] == resource["id"]
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_on_other_formats(self, mock_enqueue):
 
         resource = {"id": "test-resource-id", "format": "PDF", "url": "http://some.doc"}
@@ -296,7 +296,7 @@ def test_validation_does_not_run_on_other_formats(self, mock_enqueue):
         mock_enqueue.assert_not_called()
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_run_only_supported_formats(self, mock_enqueue):
 
         resource1 = {
@@ -325,7 +325,7 @@ def test_validation_run_only_supported_formats(self, mock_enqueue):
 
     @pytest.mark.ckan_config("ckanext.validation.run_on_create_async", False)
     @pytest.mark.ckan_config("ckanext.validation.run_on_update_async", False)
-    @mock.patch("ckanext.validation.logic.enqueue_job")
+    @mock.patch("ckanext.validation.logic.action.enqueue_job")
     def test_validation_does_not_run_when_config_false(self, mock_enqueue):
 
         resource = {

From 85509428dda23c57f7684e9c01133e35251e285d Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Wed, 11 Dec 2024 08:38:21 +1000
Subject: [PATCH 2/8] chore: Correct flask blueprint and cleanup imports by
 using get_commands in sections classes

---
 MANIFEST.in                      |  4 +-
 ckanext/validation/blueprints.py | 46 -------------------
 ckanext/validation/cli.py        |  4 ++
 ckanext/validation/common.py     | 56 +++++++++++++++++++++++
 ckanext/validation/helpers.py    | 12 +++++
 ckanext/validation/plugin.py     | 76 +++++++++++++++++++++-----------
 ckanext/validation/validators.py |  9 ++++
 ckanext/validation/views.py      | 44 +++---------------
 8 files changed, 140 insertions(+), 111 deletions(-)
 delete mode 100644 ckanext/validation/blueprints.py
 create mode 100644 ckanext/validation/common.py

diff --git a/MANIFEST.in b/MANIFEST.in
index cbb4ca17..1e3a411a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,4 @@
-include README.rst
+include README.md
 include LICENSE
 include requirements.txt
-recursive-include ckanext/validation *.html *.json *.js *.less *.css *.mo
\ No newline at end of file
+recursive-include ckanext/validation *.html *.json *.js *.less *.css *.mo *.config *.yml
\ No newline at end of file
diff --git a/ckanext/validation/blueprints.py b/ckanext/validation/blueprints.py
deleted file mode 100644
index 3ec0dc34..00000000
--- a/ckanext/validation/blueprints.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# encoding: utf-8
-
-from flask import Blueprint
-
-from ckantoolkit import c, NotAuthorized, ObjectNotFound, abort, _, render, get_action
-
-validation = Blueprint("validation", __name__)
-
-
-def read(id, resource_id):
-
-    try:
-        validation = get_action(u"resource_validation_show")(
-            {u"user": c.user}, {u"resource_id": resource_id}
-        )
-
-        resource = get_action(u"resource_show")({u"user": c.user}, {u"id": resource_id})
-
-        dataset = get_action(u"package_show")(
-            {u"user": c.user}, {u"id": resource[u"package_id"]}
-        )
-
-        # Needed for core resource templates
-        c.package = c.pkg_dict = dataset
-        c.resource = resource
-
-        return render(
-            u"validation/validation_read.html",
-            extra_vars={
-                u"validation": validation,
-                u"resource": resource,
-                u"dataset": dataset,
-                u"pkg_dict": dataset,
-            },
-        )
-
-    except NotAuthorized:
-        abort(403, _(u"Unauthorized to read this validation report"))
-    except ObjectNotFound:
-
-        abort(404, _(u"No validation report exists for this resource"))
-
-
-validation.add_url_rule(
-    "/dataset/<id>/resource/<resource_id>/validation", view_func=read
-)
diff --git a/ckanext/validation/cli.py b/ckanext/validation/cli.py
index 3b8a0570..3159840a 100644
--- a/ckanext/validation/cli.py
+++ b/ckanext/validation/cli.py
@@ -5,6 +5,10 @@
 from ckanext.validation.model import create_tables, tables_exist
 
 
+def get_commands():
+    return [validation]
+
+
 @click.group()
 def validation():
     """Harvests remotely mastered metadata."""
diff --git a/ckanext/validation/common.py b/ckanext/validation/common.py
new file mode 100644
index 00000000..cd4a284e
--- /dev/null
+++ b/ckanext/validation/common.py
@@ -0,0 +1,56 @@
+# encoding: utf-8
+
+import csv
+import logging
+import six
+import sys
+
+from ckantoolkit import (c, NotAuthorized,
+                         ObjectNotFound, abort, _,
+                         render, get_action, config)
+
+from ckanext.validation import settings
+from ckanext.validation.logic.action import _search_datasets
+from ckanext.validation.model import create_tables, tables_exist
+
+
+log = logging.getLogger(__name__)
+
+###############################################################################
+#                                  Controller                                 #
+###############################################################################
+
+
+def validation(resource_id, id=None):
+    try:
+        validation = get_action(u'resource_validation_show')(
+            {u'user': c.user},
+            {u'resource_id': resource_id})
+
+        resource = get_action(u'resource_show')(
+            {u'user': c.user},
+            {u'id': resource_id})
+
+        package_id = resource[u'package_id']
+        if id and id != package_id:
+            raise ObjectNotFound("Resource {} not found in package {}".format(resource_id, id))
+
+        dataset = get_action(u'package_show')(
+            {u'user': c.user},
+            {u'id': id or resource[u'package_id']})
+
+        # Needed for core resource templates
+        c.package = c.pkg_dict = dataset
+        c.resource = resource
+
+        return render(u'validation/validation_read.html', extra_vars={
+            u'validation': validation,
+            u'resource': resource,
+            u'pkg_dict': dataset,
+            u'dataset': dataset,
+        })
+
+    except NotAuthorized:
+        return abort(403, _(u'Unauthorized to read this validation report'))
+    except ObjectNotFound:
+        return abort(404, _(u'No validation report exists for this resource'))
\ No newline at end of file
diff --git a/ckanext/validation/helpers.py b/ckanext/validation/helpers.py
index b6c856df..aa36572b 100644
--- a/ckanext/validation/helpers.py
+++ b/ckanext/validation/helpers.py
@@ -4,6 +4,18 @@
 from ckan.lib.helpers import url_for_static
 from ckantoolkit import url_for, _, config, asbool, literal, h
 
+def _get_helpers():
+    validators = (
+        get_validation_badge,
+        validation_extract_report_from_errors,
+        dump_json_value,
+        bootstrap_version,
+        validation_dict,
+        use_webassets
+    )
+
+    return {"{}".format(func.__name__): func for func in validators}
+
 
 def get_validation_badge(resource, in_listing=False):
 
diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py
index ddda1ab4..b9ae0f77 100644
--- a/ckanext/validation/plugin.py
+++ b/ckanext/validation/plugin.py
@@ -1,14 +1,16 @@
 # encoding: utf-8
 
+import json
 import logging
 import cgi
-import json
+
 
 from werkzeug.datastructures import FileStorage as FlaskFileStorage
 import ckan.plugins as p
 import ckantoolkit as t
 
-from ckanext.validation import settings
+from . import settings, validators
+from .helpers import _get_helpers
 from ckanext.validation.model import tables_exist
 from .logic import action, auth
 from ckanext.validation.helpers import (
@@ -28,8 +30,7 @@
     get_update_mode_from_config,
 )
 from ckanext.validation.interfaces import IDataValidation
-from ckanext.validation import blueprints, cli
-
+from ckanext.validation import views, cli
 
 ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)
 log = logging.getLogger(__name__)
@@ -49,22 +50,23 @@ class ValidationPlugin(p.SingletonPlugin):
     # IBlueprint
 
     def get_blueprint(self):
-        return [blueprints.validation]
+        return views.get_blueprints()
 
     # IClick
 
     def get_commands(self):
-        return [cli.validation]
+        return cli.get_commands()
 
     # IConfigurer
 
     def update_config(self, config_):
         if not tables_exist():
+            init_command = 'ckan validation init-db'
             log.critical(u'''
-The validation extension requires a database setup. Please run the following
-to create the database tables:
-    paster --plugin=ckanext-validation validation init-db
-''')
+The validation extension requires a database setup.
+Validation pages will not be enabled.
+Please run the following to create the database tables:
+    %s''', init_command)
         else:
             log.debug(u'Validation tables exist')
 
@@ -85,14 +87,12 @@ def get_auth_functions(self):
     # ITemplateHelpers
 
     def get_helpers(self):
-        return {
-            u'get_validation_badge': get_validation_badge,
-            u'validation_extract_report_from_errors': validation_extract_report_from_errors,
-            u'dump_json_value': dump_json_value,
-            u'bootstrap_version': bootstrap_version,
-            u'validation_dict': validation_dict,
-            u'use_webassets': use_webassets,
-        }
+        return _get_helpers()
+
+    # IValidators
+
+    def get_validators(self):
+        return validators.get_validators()
 
     # IResourceController
 
@@ -132,14 +132,27 @@ def _process_schema_fields(self, data_dict):
 
         return data_dict
 
+    # CKAN < 2.10
     def before_create(self, context, data_dict):
+        return self.before_resource_create(context, data_dict)
+
+    # CKAN >= 2.10
+    def before_resource_create(self, context, data_dict):
 
         is_dataset = self._data_dict_is_dataset(data_dict)
         if not is_dataset:
             context["_resource_create_call"] = True
             return self._process_schema_fields(data_dict)
 
+    # CKAN < 2.10
     def after_create(self, context, data_dict):
+        # if (self._data_dict_is_dataset(data_dict)):
+        #     return self.after_dataset_create(context, data_dict)
+        # else:
+        return self.after_resource_create(context, data_dict)
+
+    # CKAN >= 2.10
+    def after_resource_create(self, context, data_dict):
 
         is_dataset = self._data_dict_is_dataset(data_dict)
 
@@ -185,7 +198,12 @@ def _handle_validation_for_resource(self, context, resource):
 
             _run_async_validation(resource[u'id'])
 
+    # CKAN < 2.10
     def before_update(self, context, current_resource, updated_resource):
+        return self.before_resource_update(context, current_resource, updated_resource)
+
+    # CKAN >= 2.10
+    def before_resource_update(self, context, current_resource, updated_resource):
 
         updated_resource = self._process_schema_fields(updated_resource)
 
@@ -225,7 +243,15 @@ def before_update(self, context, current_resource, updated_resource):
 
         return updated_resource
 
+    # CKAN < 2.10
     def after_update(self, context, data_dict):
+        # if (self._data_dict_is_dataset(data_dict)):
+        #     return self.after_dataset_update(context, data_dict)
+        # else:
+        return self.after_resource_update(context, data_dict)
+
+    # CKAN >= 2.10
+    def after_resource_update(self, context, data_dict):
 
         is_dataset = self._data_dict_is_dataset(data_dict)
 
@@ -287,7 +313,13 @@ def after_update(self, context, data_dict):
 
     # IPackageController
 
+    # CKAN < 2.10
     def before_index(self, index_dict):
+        if (self._data_dict_is_dataset(index_dict)):
+            return self.before_dataset_index(index_dict)
+
+    # CKAN >= 2.10
+    def before_dataset_index(self, index_dict):
 
         res_status = []
         dataset_dict = json.loads(index_dict['validated_data_dict'])
@@ -300,14 +332,6 @@ def before_index(self, index_dict):
 
         return index_dict
 
-    # IValidators
-
-    def get_validators(self):
-        return {
-            'resource_schema_validator': resource_schema_validator,
-            'validation_options_validator': validation_options_validator,
-        }
-
 
 def _run_async_validation(resource_id):
 
diff --git a/ckanext/validation/validators.py b/ckanext/validation/validators.py
index 9e1962a8..da7c081a 100644
--- a/ckanext/validation/validators.py
+++ b/ckanext/validation/validators.py
@@ -6,6 +6,15 @@
 from ckantoolkit import Invalid, config
 
 
+def get_validators():
+    validators = (
+        resource_schema_validator,
+        validation_options_validator,
+    )
+
+    return {"{}".format(func.__name__): func for func in validators}
+
+
 # Input validators
 
 def resource_schema_validator(value, context):
diff --git a/ckanext/validation/views.py b/ckanext/validation/views.py
index c17f44ee..8e91962a 100644
--- a/ckanext/validation/views.py
+++ b/ckanext/validation/views.py
@@ -2,46 +2,16 @@
 
 from flask import Blueprint
 
-from ckantoolkit import (
-    c, NotAuthorized, ObjectNotFound,
-    abort, _, render, get_action)
+from ckanext.validation import common
 
-validation = Blueprint("service_proxy", __name__)
+validation = Blueprint(u'validation', __name__)
 
 
-def validation_read(self, id, resource_id):
-
-    try:
-        validation = get_action(u'resource_validation_show')(
-            {u'user': c.user},
-            {u'resource_id': resource_id})
-
-        resource = get_action(u'resource_show')(
-            {u'user': c.user},
-            {u'id': resource_id})
-
-        dataset = get_action(u'package_show')(
-            {u'user': c.user},
-            {u'id': resource[u'package_id']})
-
-        # Needed for core resource templates
-        c.package = c.pkg_dict = dataset
-        c.resource = resource
-
-        return render(u'validation/validation_read.html', extra_vars={
-            u'validation': validation,
-            u'resource': resource,
-            u'dataset': dataset,
-        })
-
-    except NotAuthorized:
-        abort(403, _(u'Unauthorized to read this validation report'))
-    except ObjectNotFound:
-
-        abort(404, _(u'No validation report exists for this resource'))
-
 
 validation.add_url_rule(
-    '/dataset/{id}/resource/{resource_id}/validation',
-    view_func=validation_read
+    u'/dataset/<id>/resource/<resource_id>/validation', 'read', methods=('GET',), view_func=common.validation
 )
+
+
+def get_blueprints():
+    return [validation]

From 8755a7801149eb62d280dfd42dcc1c5d55608762 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Wed, 11 Dec 2024 13:24:59 +1000
Subject: [PATCH 3/8] chore: move commands to common and cross referfence in
 cli also

---
 ckanext/validation/cli.py      |  81 +++++++++-
 ckanext/validation/commands.py | 267 ++-------------------------------
 ckanext/validation/common.py   | 262 +++++++++++++++++++++++++++++++-
 3 files changed, 344 insertions(+), 266 deletions(-)

diff --git a/ckanext/validation/cli.py b/ckanext/validation/cli.py
index 3159840a..5a68627d 100644
--- a/ckanext/validation/cli.py
+++ b/ckanext/validation/cli.py
@@ -2,7 +2,7 @@
 
 import click
 
-from ckanext.validation.model import create_tables, tables_exist
+from ckanext.validation import common
 
 
 def get_commands():
@@ -17,10 +17,77 @@ def validation():
 
 @validation.command()
 def init_db():
-    """Creates the necessary tables in the database."""
-    if tables_exist():
-        print(u"Validation tables already exist")
-        sys.exit(0)
+    """ Initialize database tables.
+    """
+    common.init_db()
 
-    create_tables()
-    print(u"Validation tables created")
+
+@validation.command(name='run')
+@click.option(u'-y', u'--yes',
+              help=u'Automatic yes to prompts. Assume "yes" as answer '
+                   u'to all prompts and run non-interactively',
+              default=False)
+@click.option('-r', '--resource',
+              multiple=True,
+              help=u'Run data validation on a particular resource (if the format is suitable).'
+                   u'It can be defined multiple times. Not to be used with -d or -s')
+@click.option('-d', '--dataset',
+              multiple=True,
+              help=u'Run data validation on all resources for a particular dataset (if the format is suitable).'
+                   u' You can use the dataset id or name, and it can be defined multiple times. '
+                   u'Not to be used with -r or -s')
+@click.option('-s', '--search',
+              default=False,
+              help=u'Extra search parameters that will be used for getting the datasets to run '
+                   u'validation on. It must be a JSON object like the one used by the `package_search` API call.'
+                   u' Supported fields are `q`, `fq` and `fq_list`. Check the documentation for examples. '
+                   u'Note that when using this you will have to specify the resource formats to target yourself.'
+                   u' Not to be used with -r or -d.')
+def run_validation(yes, resource, dataset, search):
+    '''Start asynchronous data validation on the site resources. If no
+    options are provided it will run validation on all resources of
+    the supported formats (`ckanext.validation.formats`). You can
+    specify particular datasets to run the validation on their
+    resources. You can also pass arbitrary search parameters to filter
+    the selected datasets.
+    '''
+    common.run_validation(yes, resource, dataset, search)
+
+
+@validation.command()
+@click.option(u'-o', u'--output',
+              help=u'Location of the CSV validation report file on the relevant commands.',
+              default=u'validation_errors_report.csv')
+def report(output):
+    '''Generate a report with all current data validation reports. This
+    will print an overview of the total number of tabular resources
+    and a breakdown of how many have a validation status of success,
+    failure or error. Additionally it will create a CSV report with all
+    failing resources, including the following fields:
+        * Dataset name
+        * Resource id
+        * Resource URL
+        * Status
+        * Validation report URL
+    '''
+    common.report(output)
+
+
+@validation.command(name='report-full')
+@click.option(u'-o', u'--output',
+              help=u'Location of the CSV validation report file on the relevant commands.',
+              default=u'validation_errors_report.csv')
+def report_full(output):
+    '''Generate a detailed report. This is similar to 'report'
+    but on the CSV report it will add a row for each error found on the
+    validation report (limited to ten occurrences of the same error
+    type per file). So the fields in the generated CSV report will be:
+
+        * Dataset name
+        * Resource id
+        * Resource URL
+        * Status
+        * Error code
+        * Error message
+    '''
+    common.report(output, full=True)
diff --git a/ckanext/validation/commands.py b/ckanext/validation/commands.py
index 4cb1ba69..04505bb9 100644
--- a/ckanext/validation/commands.py
+++ b/ckanext/validation/commands.py
@@ -1,25 +1,10 @@
 # encoding: utf-8
 
 import sys
-import logging
-import csv
 
-from ckan.lib.cli import query_yes_no
-from ckantoolkit import CkanCommand, get_action, config
+from ckantoolkit import CkanCommand
 
-from ckanext.validation import settings
-from ckanext.validation.model import create_tables, tables_exist
-from ckanext.validation.logic import _search_datasets
-
-
-def error(msg):
-    '''
-    Print an error message to STDOUT and exit with return code 1.
-    '''
-    sys.stderr.write(msg)
-    if not msg.endswith('\n'):
-        sys.stderr.write('\n')
-    sys.exit(1)
+from ckanext.validation import common
 
 
 class Validation(CkanCommand):
@@ -111,9 +96,6 @@ def __init__(self, name):
                                help='''Location of the CSV validation
 report file on the relevant commands.''')
 
-
-    _page_size = 100
-
     def command(self):
         self._load_config()
 
@@ -137,249 +119,18 @@ def command(self):
             sys.exit(1)
 
     def init_db(self):
-
-        if tables_exist():
-            print(u'Validation tables already exist')
-            sys.exit(0)
-
-        create_tables()
-
-        print(u'Validation tables created')
+        common.init_db()
 
     def run_validation(self):
 
-        if self.options.resource_id:
-            for resource_id in self.options.resource_id:
-                resource = get_action('resource_show')({}, {'id': resource_id})
-                self._run_validation_on_resource(
-                    resource['id'], resource['package_id'])
-        else:
-
-            query = _search_datasets()
-
-            if query['count'] == 0:
-                error('No suitable datasets, exiting...')
-
-            elif not self.options.assume_yes:
-
-                msg = ('\nYou are about to start validation for {0} datasets' +
-                       '.\n Do you want to continue?')
-
-                confirm = query_yes_no(msg.format(query['count']))
-
-                if confirm == 'no':
-                    error('Command aborted by user')
-
-            result = get_action('resource_validation_run_batch')(
-                {'ignore_auth': True},
-                {'dataset_ids': self.options.dataset_id,
-                 'query': self.options.search_params}
-            )
-            print(result['output'])
-
-    def _run_validation_on_resource(self, resource_id, dataset_id):
-
-        log = logging.getLogger(__name__)
-
-        get_action(u'resource_validation_run')(
-            {u'ignore_auth': True},
-            {u'resource_id': resource_id,
-             u'async': True})
-
-        msg = ('Resource {} from dataset {} sent to ' +
-               'the validation queue')
-
-        log.debug(
-            msg.format(resource_id, dataset_id))
-
-    def _process_row(self, dataset, resource, writer):
-        resource_url = '{}/dataset/{}/resource/{}'.format(
-            config['ckan.site_url'],
-            dataset['name'],
-            resource['id'])
-
-        validation_url = resource_url + '/validation'
-
-        writer.writerow({
-            'dataset': dataset['name'],
-            'resource_id': resource['id'],
-            'format': resource['format'],
-            'url': resource_url,
-            'status': resource['validation_status'],
-            'validation_report_url': validation_url
-        })
-
-        return
-
-    def _process_row_full(self, dataset, resource, writer):
-
-        limit_per_error_type = 10
-
-        error_counts = {}
-
-        resource_url = '{}/dataset/{}/resource/{}'.format(
-            config['ckan.site_url'],
-            dataset['name'],
-            resource['id'])
-
-        # Get validation report
-        validation = get_action('resource_validation_show')(
-            {'ignore_auth': True}, {'resource_id': resource['id']})
-
-        if not validation.get('report'):
-            return
+        assume_yes = self.options.assume_yes
+        resource_ids = self.options.resource_id
+        dataset_ids = self.options.dataset_id
+        query = self.options.search_params
 
-        errors = validation['report']['tables'][0]['errors']
-
-        for error in errors:
-            if not error['code'] in error_counts:
-                error_counts[error['code']] = 1
-            else:
-                error_counts[error['code']] += 1
-
-            if error_counts[error['code']] > limit_per_error_type:
-                continue
-
-            writer.writerow({
-                'dataset': dataset['name'],
-                'resource_id': resource['id'],
-                'format': resource['format'],
-                'url': resource_url,
-                'status': resource['validation_status'],
-                'error_code': error['code'],
-                'error_message': error['message']
-            })
-
-        return error_counts
+        common.run_validation(assume_yes, resource_ids, dataset_ids, query)
 
     def report(self, full=False):
 
-        log = logging.getLogger(__name__)
-
         output_csv = self.options.output_file
-        if output_csv == 'validation_errors_report.csv' and full:
-            output_csv = 'validation_errors_report_full.csv'
-
-        outputs = {
-            'tabular_resources': 0,
-            'resources_failure': 0,
-            'resources_error': 0,
-            'resources_success': 0,
-            'datasets': 0,
-            'formats_success': {},
-            'formats_failure': {}
-        }
-        error_counts = {}
-
-        with open(output_csv, 'w') as fw:
-            if full:
-                fieldnames = [
-                    'dataset', 'resource_id', 'format', 'url',
-                    'status', 'error_code', 'error_message']
-            else:
-                fieldnames = [
-                    'dataset', 'resource_id', 'format', 'url',
-                    'status', 'validation_report_url']
-
-            writer = csv.DictWriter(fw, fieldnames=fieldnames)
-            writer.writeheader()
-
-            page = 1
-            while True:
-                query = _search_datasets(page)
-
-                if page == 1 and query['count'] == 0:
-                    error('No suitable datasets, exiting...')
-
-                if query['results']:
-                    for dataset in query['results']:
-
-                        if not dataset.get('resources'):
-                            continue
-
-                        for resource in dataset['resources']:
-
-                            if (not resource['format'].lower() in
-                                    settings.DEFAULT_SUPPORTED_FORMATS):
-                                continue
-
-                            outputs['tabular_resources'] += 1
-
-                            if resource.get('validation_status'):
-                                outputs['resources_' + resource['validation_status']] += 1
-
-                            if resource.get('validation_status') in (
-                                        'failure', 'error'):
-                                if full:
-                                    row_counts = self._process_row_full(dataset, resource, writer)
-                                    if not row_counts:
-                                        continue
-                                    for code, count in row_counts.iteritems():
-                                        if code not in error_counts:
-                                            error_counts[code] = count
-                                        else:
-                                            error_counts[code] += count
-                                else:
-                                    self._process_row(dataset, resource, writer)
-
-                                if resource['format'] in outputs['formats_failure']:
-                                    outputs['formats_failure'][resource['format']] += 1
-                                else:
-                                    outputs['formats_failure'][resource['format']] = 1
-                            else:
-                                if resource['format'] in outputs['formats_success']:
-                                    outputs['formats_success'][resource['format']] += 1
-                                else:
-                                    outputs['formats_success'][resource['format']] = 1
-
-
-                    if len(query['results']) < self._page_size:
-                        break
-
-                    page += 1
-                else:
-                    break
-
-        outputs['datasets'] = query['count']
-        outputs['output_csv'] = output_csv
-
-        outputs['formats_success_output'] = ''
-        for count, code in sorted([(v, k) for k, v in outputs['formats_success'].iteritems()], reverse=True):
-            outputs['formats_success_output'] += '* {}: {}\n'.format(code, count)
-
-        outputs['formats_failure_output'] = ''
-        for count, code in sorted([(v, k) for k, v in outputs['formats_failure'].iteritems()], reverse=True):
-            outputs['formats_failure_output'] += '* {}: {}\n'.format(code, count)
-
-        error_counts_output = ''
-        if full:
-            for count, code in sorted([(v, k) for k, v in error_counts.iteritems()], reverse=True):
-                error_counts_output += '* {}: {}\n'.format(code, count)
-
-        outputs['error_counts_output'] = error_counts_output
-
-        msg_errors = '''
-Errors breakdown:
-{}
-'''.format(outputs['error_counts_output'])
-
-        outputs['msg_errors'] = msg_errors if full else ''
-
-        msg = '''
-Done.
-{datasets} datasets with tabular resources
-{tabular_resources} tabular resources
-{resources_success} resources - validation success
-{resources_failure} resources - validation failure
-{resources_error} resources - validation error
-
-Formats breakdown (validation passed):
-{formats_success_output}
-Formats breakdown (validation failed or errored):
-{formats_failure_output}
-{msg_errors}
-CSV Report stored in {output_csv}
-'''.format(**outputs)
-
-
-        log.info(msg)
+        common.report(output_csv, full)
diff --git a/ckanext/validation/common.py b/ckanext/validation/common.py
index cd4a284e..fd01205c 100644
--- a/ckanext/validation/common.py
+++ b/ckanext/validation/common.py
@@ -53,4 +53,264 @@ def validation(resource_id, id=None):
     except NotAuthorized:
         return abort(403, _(u'Unauthorized to read this validation report'))
     except ObjectNotFound:
-        return abort(404, _(u'No validation report exists for this resource'))
\ No newline at end of file
+        return abort(404, _(u'No validation report exists for this resource'))
+
+
+###############################################################################
+#                                     CLI                                     #
+###############################################################################
+
+
+def user_confirm(msg):
+    import click
+    return click.confirm(msg)
+
+
+def error(msg):
+    '''
+    Print an error message to STDOUT and exit with return code 1.
+    '''
+    sys.stderr.write(msg)
+    if not msg.endswith('\n'):
+        sys.stderr.write('\n')
+    sys.exit(1)
+
+
+def init_db():
+    if tables_exist():
+        print(u'Validation tables already exist')
+        sys.exit(0)
+    create_tables()
+    print(u'Validation tables created')
+
+
+def run_validation(assume_yes, resource_ids, dataset_ids, search_params):
+
+    if resource_ids:
+        for resource_id in resource_ids:
+            resource = get_action('resource_show')({}, {'id': resource_id})
+            _run_validation_on_resource(
+                resource['id'], resource['package_id'])
+    else:
+
+        query = _search_datasets()
+
+        if query['count'] == 0:
+            error('No suitable datasets, exiting...')
+
+        elif not assume_yes:
+            msg = ('\nYou are about to start validation for {0} datasets'
+                   '.\n Do you want to continue?')
+
+            if not user_confirm(msg.format(query['count'])):
+                error('Command aborted by user')
+
+        result = get_action('resource_validation_run_batch')(
+            {'ignore_auth': True},
+            {'dataset_ids': dataset_ids,
+             'query': search_params}
+        )
+        print(result['output'])
+
+
+def _run_validation_on_resource(resource_id, dataset_id):
+
+    get_action(u'resource_validation_run')(
+        {u'ignore_auth': True},
+        {u'resource_id': resource_id,
+         u'async': True})
+
+    log.debug('Resource %s from dataset %s sent to the validation queue',
+              resource_id, dataset_id)
+
+
+def _process_row(dataset, resource, writer):
+    resource_url = '{}/dataset/{}/resource/{}'.format(
+        config['ckan.site_url'],
+        dataset['name'],
+        resource['id'])
+
+    validation_url = resource_url + '/validation'
+
+    writer.writerow({
+        'dataset': dataset['name'],
+        'resource_id': resource['id'],
+        'format': resource['format'],
+        'url': resource_url,
+        'status': resource['validation_status'],
+        'validation_report_url': validation_url
+    })
+
+    return
+
+
+def _process_row_full(dataset, resource, writer):
+
+    limit_per_error_type = 10
+
+    error_counts = {}
+
+    resource_url = '{}/dataset/{}/resource/{}'.format(
+        config['ckan.site_url'],
+        dataset['name'],
+        resource['id'])
+
+    # Get validation report
+    validation = get_action('resource_validation_show')(
+        {'ignore_auth': True}, {'resource_id': resource['id']})
+
+    if not validation.get('report'):
+        return
+
+    errors = validation['report']['tables'][0]['errors']
+
+    for error in errors:
+        if not error['code'] in error_counts:
+            error_counts[error['code']] = 1
+        else:
+            error_counts[error['code']] += 1
+
+        if error_counts[error['code']] > limit_per_error_type:
+            continue
+
+        writer.writerow({
+            'dataset': dataset['name'],
+            'resource_id': resource['id'],
+            'format': resource['format'],
+            'url': resource_url,
+            'status': resource['validation_status'],
+            'error_code': error['code'],
+            'error_message': error['message']
+        })
+
+    return error_counts
+
+
+def report(output_csv, full=False):
+
+    _page_size = 100
+
+    if output_csv == 'validation_errors_report.csv' and full:
+        output_csv = 'validation_errors_report_full.csv'
+
+    outputs = {
+        'tabular_resources': 0,
+        'resources_failure': 0,
+        'resources_error': 0,
+        'resources_success': 0,
+        'datasets': 0,
+        'formats_success': {},
+        'formats_failure': {}
+    }
+    error_counts = {}
+
+    with open(output_csv, 'w') as fw:
+        if full:
+            fieldnames = [
+                'dataset', 'resource_id', 'format', 'url',
+                'status', 'error_code', 'error_message']
+        else:
+            fieldnames = [
+                'dataset', 'resource_id', 'format', 'url',
+                'status', 'validation_report_url']
+
+        writer = csv.DictWriter(fw, fieldnames=fieldnames)
+        writer.writeheader()
+
+        page = 1
+        while True:
+            query = _search_datasets(page)
+
+            if page == 1 and query['count'] == 0:
+                error('No suitable datasets, exiting...')
+
+            if query['results']:
+                for dataset in query['results']:
+
+                    if not dataset.get('resources'):
+                        continue
+
+                    for resource in dataset['resources']:
+
+                        if (not resource['format'].lower() in
+                                settings.DEFAULT_SUPPORTED_FORMATS):
+                            continue
+
+                        outputs['tabular_resources'] += 1
+
+                        if resource.get('validation_status'):
+                            outputs['resources_' + resource['validation_status']] += 1
+
+                        if resource.get('validation_status') in (
+                                'failure', 'error'):
+                            if full:
+                                row_counts = _process_row_full(dataset, resource, writer)
+                                if not row_counts:
+                                    continue
+                                for code, count in six.iteritems(row_counts):
+                                    if code not in error_counts:
+                                        error_counts[code] = count
+                                    else:
+                                        error_counts[code] += count
+                            else:
+                                _process_row(dataset, resource, writer)
+
+                            if resource['format'] in outputs['formats_failure']:
+                                outputs['formats_failure'][resource['format']] += 1
+                            else:
+                                outputs['formats_failure'][resource['format']] = 1
+                        else:
+                            if resource['format'] in outputs['formats_success']:
+                                outputs['formats_success'][resource['format']] += 1
+                            else:
+                                outputs['formats_success'][resource['format']] = 1
+
+                if len(query['results']) < _page_size:
+                    break
+
+                page += 1
+            else:
+                break
+
+    outputs['datasets'] = query['count']
+    outputs['output_csv'] = output_csv
+
+    outputs['formats_success_output'] = ''
+    for count, code in sorted([(v, k) for k, v in six.iteritems(outputs['formats_success'])], reverse=True):
+        outputs['formats_success_output'] += '* {}: {}\n'.format(code, count)
+
+    outputs['formats_failure_output'] = ''
+    for count, code in sorted([(v, k) for k, v in six.iteritems(outputs['formats_failure'])], reverse=True):
+        outputs['formats_failure_output'] += '* {}: {}\n'.format(code, count)
+
+    error_counts_output = ''
+    if full:
+        for count, code in sorted([(v, k) for k, v in six.iteritems(error_counts)], reverse=True):
+            error_counts_output += '* {}: {}\n'.format(code, count)
+
+    outputs['error_counts_output'] = error_counts_output
+
+    msg_errors = '''
+        Errors breakdown:
+        {}
+        '''.format(outputs['error_counts_output'])
+
+    outputs['msg_errors'] = msg_errors if full else ''
+
+    msg = '''
+        Done.
+        {datasets} datasets with tabular resources
+        {tabular_resources} tabular resources
+        {resources_success} resources - validation success
+        {resources_failure} resources - validation failure
+        {resources_error} resources - validation error
+
+        Formats breakdown (validation passed):
+        {formats_success_output}
+        Formats breakdown (validation failed or errored):
+        {formats_failure_output}
+        {msg_errors}
+        CSV Report stored in {output_csv}
+        '''.format(**outputs)
+
+    log.info(msg)

From 22692ed4065a12a1f43c4a11a379680e436b0f28 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Wed, 11 Dec 2024 13:31:05 +1000
Subject: [PATCH 4/8] chore: controller uses common function

---
 ckanext/validation/cli.py        |  7 ++++---
 ckanext/validation/controller.py | 36 ++++----------------------------
 2 files changed, 8 insertions(+), 35 deletions(-)

diff --git a/ckanext/validation/cli.py b/ckanext/validation/cli.py
index 5a68627d..290dcbf9 100644
--- a/ckanext/validation/cli.py
+++ b/ckanext/validation/cli.py
@@ -1,4 +1,4 @@
-import sys
+# encoding: utf-8
 
 import click
 
@@ -11,11 +11,12 @@ def get_commands():
 
 @click.group()
 def validation():
-    """Harvests remotely mastered metadata."""
+    """Validation management commands.
+    """
     pass
 
 
-@validation.command()
+@validation.command(name='init-db')
 def init_db():
     """ Initialize database tables.
     """
diff --git a/ckanext/validation/controller.py b/ckanext/validation/controller.py
index 91224649..b4396a21 100644
--- a/ckanext/validation/controller.py
+++ b/ckanext/validation/controller.py
@@ -1,39 +1,11 @@
 # encoding: utf-8
 
-from ckantoolkit import (
-    BaseController, c, NotAuthorized, ObjectNotFound,
-    abort, _, render, get_action)
+from ckantoolkit import BaseController
+
+from ckanext.validation import common
 
 
 class ValidationController(BaseController):
 
     def validation(self, resource_id):
-
-        try:
-            validation = get_action(u'resource_validation_show')(
-                {u'user': c.user},
-                {u'resource_id': resource_id})
-
-            resource = get_action(u'resource_show')(
-                {u'user': c.user},
-                {u'id': resource_id})
-
-            dataset = get_action(u'package_show')(
-                {u'user': c.user},
-                {u'id': resource[u'package_id']})
-
-            # Needed for core resource templates
-            c.package = c.pkg_dict = dataset
-            c.resource = resource
-
-            return render(u'validation/validation_read.html', extra_vars={
-                u'validation': validation,
-                u'resource': resource,
-                u'dataset': dataset,
-            })
-
-        except NotAuthorized:
-            abort(403, _(u'Unauthorized to read this validation report'))
-        except ObjectNotFound:
-
-            abort(404, _(u'No validation report exists for this resource'))
+        return common.validation(resource_id)

From f001c86f493615c645d8b781c12bdadfcb2717cb Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Wed, 11 Dec 2024 13:32:16 +1000
Subject: [PATCH 5/8] chore: update model

---
 ckanext/validation/model.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/ckanext/validation/model.py b/ckanext/validation/model.py
index 52841c06..9e82b7f7 100644
--- a/ckanext/validation/model.py
+++ b/ckanext/validation/model.py
@@ -7,6 +7,7 @@
 from sqlalchemy import Column, Unicode, DateTime
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.dialects.postgresql import JSON
+from six import text_type
 
 from ckan.model.meta import metadata
 
@@ -14,7 +15,7 @@
 
 
 def make_uuid():
-    return str(uuid.uuid4())
+    return text_type(uuid.uuid4())
 
 
 Base = declarative_base(metadata=metadata)
@@ -25,10 +26,20 @@ class Validation(Base):
 
     id = Column(Unicode, primary_key=True, default=make_uuid)
     resource_id = Column(Unicode)
+    #  status can be one of these values:
+    #     created: Job created and put onto queue
+    #     running: Job picked up by worker and being processed
+    #     success: Validation Successful and report attached
+    #     failure: Validation Failed and report attached
+    #     error: Validation Job could not create validation report
     status = Column(Unicode, default=u'created')
+    # created is when job was added
     created = Column(DateTime, default=datetime.datetime.utcnow)
+    # finished is when report was generated, is None when new or restarted
     finished = Column(DateTime)
+    # json object of report, can be None
     report = Column(JSON)
+    # json object of error, can be None
     error = Column(JSON)
 
 

From 080855574430ca1bccef48e6e11b124ef43f5208 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Wed, 11 Dec 2024 13:48:56 +1000
Subject: [PATCH 6/8] chore: move functions out of plugin into utils

---
 ckanext/validation/plugin.py | 116 +++--------------------------------
 ckanext/validation/utils.py  | 106 +++++++++++++++++++++++++++++++-
 2 files changed, 115 insertions(+), 107 deletions(-)

diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py
index b9ae0f77..a47648ce 100644
--- a/ckanext/validation/plugin.py
+++ b/ckanext/validation/plugin.py
@@ -2,29 +2,17 @@
 
 import json
 import logging
-import cgi
 
 
-from werkzeug.datastructures import FileStorage as FlaskFileStorage
+
 import ckan.plugins as p
 import ckantoolkit as t
 
-from . import settings, validators
+from . import settings, utils, validators
 from .helpers import _get_helpers
 from ckanext.validation.model import tables_exist
 from .logic import action, auth
-from ckanext.validation.helpers import (
-    get_validation_badge,
-    validation_extract_report_from_errors,
-    dump_json_value,
-    bootstrap_version,
-    validation_dict,
-    use_webassets,
-)
-from ckanext.validation.validators import (
-    resource_schema_validator,
-    validation_options_validator,
-)
+
 from ckanext.validation.utils import (
     get_create_mode_from_config,
     get_update_mode_from_config,
@@ -32,7 +20,7 @@
 from ckanext.validation.interfaces import IDataValidation
 from ckanext.validation import views, cli
 
-ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)
+
 log = logging.getLogger(__name__)
 
 
@@ -99,38 +87,6 @@ def get_validators(self):
     resources_to_validate = {}
     packages_to_skip = {}
 
-    def _process_schema_fields(self, data_dict):
-        u'''
-        Normalize the different ways of providing the `schema` field
-
-        1. If `schema_upload` is provided and it's a valid file, the contents
-           are read into `schema`.
-        2. If `schema_url` is provided and looks like a valid URL, it's copied
-           to `schema`
-        3. If `schema_json` is provided, it's copied to `schema`.
-
-        All the 3 `schema_*` fields are removed from the data_dict.
-        Note that the data_dict still needs to pass validation
-        '''
-
-        schema_upload = data_dict.pop(u'schema_upload', None)
-        schema_url = data_dict.pop(u'schema_url', None)
-        schema_json = data_dict.pop(u'schema_json', None)
-        if isinstance(schema_upload, ALLOWED_UPLOAD_TYPES):
-            uploaded_file = _get_underlying_file(schema_upload)
-            data_dict[u'schema'] = uploaded_file.read()
-            if isinstance(data_dict["schema"], (bytes, bytearray)):
-                data_dict["schema"] = data_dict["schema"].decode()
-        elif schema_url:
-
-            if (not isinstance(schema_url, str) or
-                    not schema_url.lower()[:4] == u'http'):
-                raise t.ValidationError({u'schema_url': 'Must be a valid URL'})
-            data_dict[u'schema'] = schema_url
-        elif schema_json:
-            data_dict[u'schema'] = schema_json
-
-        return data_dict
 
     # CKAN < 2.10
     def before_create(self, context, data_dict):
@@ -142,7 +98,7 @@ def before_resource_create(self, context, data_dict):
         is_dataset = self._data_dict_is_dataset(data_dict)
         if not is_dataset:
             context["_resource_create_call"] = True
-            return self._process_schema_fields(data_dict)
+            return utils.process_schema_fields(data_dict)
 
     # CKAN < 2.10
     def after_create(self, context, data_dict):
@@ -196,7 +152,7 @@ def _handle_validation_for_resource(self, context, resource):
                     log.debug('Skipping validation for resource %s', resource['id'])
                     return
 
-            _run_async_validation(resource[u'id'])
+            utils._run_async_validation(resource[u'id'])
 
     # CKAN < 2.10
     def before_update(self, context, current_resource, updated_resource):
@@ -205,7 +161,7 @@ def before_update(self, context, current_resource, updated_resource):
     # CKAN >= 2.10
     def before_resource_update(self, context, current_resource, updated_resource):
 
-        updated_resource = self._process_schema_fields(updated_resource)
+        updated_resource = utils.process_schema_fields(updated_resource)
 
         # the call originates from a resource API, so don't validate the entire package
         package_id = updated_resource.get('package_id')
@@ -306,10 +262,10 @@ def after_resource_update(self, context, data_dict):
 
                 del self.resources_to_validate[resource_id]
 
-                _run_async_validation(resource_id)
+                utils._run_async_validation(resource_id)
 
-            if _should_remove_unsupported_resource_validation_reports(data_dict):
-                p.toolkit.enqueue_job(fn=_remove_unsupported_resource_validation_reports, args=[resource_id])
+            if utils._should_remove_unsupported_resource_validation_reports(data_dict):
+                p.toolkit.enqueue_job(fn=utils._remove_unsupported_resource_validation_reports, args=[resource_id])
 
     # IPackageController
 
@@ -332,55 +288,3 @@ def before_dataset_index(self, index_dict):
 
         return index_dict
 
-
-def _run_async_validation(resource_id):
-
-    try:
-        t.get_action(u'resource_validation_run')(
-            {u'ignore_auth': True},
-            {u'resource_id': resource_id,
-             u'async': True})
-    except t.ValidationError as e:
-        log.warning(
-            u'Could not run validation for resource %s: %s',
-                resource_id, e)
-
-def _get_underlying_file(wrapper):
-    if isinstance(wrapper, FlaskFileStorage):
-        return wrapper.stream
-    return wrapper.file
-
-
-def _should_remove_unsupported_resource_validation_reports(res_dict):
-    if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)):
-        return False
-    return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS
-            and (res_dict.get('url_type') == 'upload'
-                or not res_dict.get('url_type'))
-            and (t.h.asbool(res_dict.get('validation_status', False))
-                or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False))))
-
-
-def _remove_unsupported_resource_validation_reports(resource_id):
-    """
-    Callback to remove unsupported validation reports.
-    Controlled by config value: ckanext.validation.clean_validation_reports.
-    Double check the resource format. Only supported Validation formats should have validation reports.
-    If the resource format is not supported, we should delete the validation reports.
-    """
-    context = {"ignore_auth": True}
-    try:
-        res = p.toolkit.get_action('resource_show')(context, {"id": resource_id})
-    except t.ObjectNotFound:
-        log.error('Resource %s does not exist.', resource_id)
-        return
-
-    if _should_remove_unsupported_resource_validation_reports(res):
-        log.info('Unsupported resource format "%s". Deleting validation reports for resource %s',
-            res.get(u'format', u''), res['id'])
-        try:
-            p.toolkit.get_action('resource_validation_delete')(context, {
-                "resource_id": res['id']})
-            log.info('Validation reports deleted for resource %s', res['id'])
-        except t.ObjectNotFound:
-            log.error('Validation reports for resource %s do not exist', res['id'])
diff --git a/ckanext/validation/utils.py b/ckanext/validation/utils.py
index 6c16d77f..f5cdb9fe 100644
--- a/ckanext/validation/utils.py
+++ b/ckanext/validation/utils.py
@@ -1,5 +1,6 @@
 import os
 import logging
+import cgi
 
 from ckan.plugins import PluginImplementations
 from ckan.lib.uploader import ResourceUpload
@@ -7,9 +8,112 @@
 
 from ckanext.validation.interfaces import IPipeValidation
 
-
 log = logging.getLogger(__name__)
 
+from . import settings
+import ckan.plugins as p
+import ckantoolkit as t
+
+from werkzeug.datastructures import FileStorage as FlaskFileStorage
+ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)
+
+def process_schema_fields(data_dict):
+    u'''
+     Normalize the different ways of providing the `schema` field
+
+     1. If `schema_upload` is provided and it's a valid file, the contents
+         are read into `schema`.
+     2. If `schema_url` is provided and looks like a valid URL, it's copied
+         to `schema`
+     3. If `schema_json` is provided, it's copied to `schema`.
+
+     All the 3 `schema_*` fields are removed from the data_dict.
+     Note that the data_dict still needs to pass validation
+     '''
+
+    schema_upload = data_dict.pop(u'schema_upload', None)
+    schema_url = data_dict.pop(u'schema_url', None)
+    schema_json = data_dict.pop(u'schema_json', None)
+
+    if isinstance(schema_upload, ALLOWED_UPLOAD_TYPES):
+        uploaded_file = _get_underlying_file(schema_upload)
+        data_dict[u'schema'] = uploaded_file.read()
+        if isinstance(data_dict["schema"], (bytes, bytearray)):
+            data_dict["schema"] = data_dict["schema"].decode()
+    elif schema_url:
+
+        if (not isinstance(schema_url, str) or
+                not schema_url.lower()[:4] == u'http'):
+            raise t.ValidationError({u'schema_url': 'Must be a valid URL'})
+        data_dict[u'schema'] = schema_url
+    elif schema_json:
+        data_dict[u'schema'] = schema_json
+
+    return data_dict
+
+def _get_underlying_file(wrapper):
+    if isinstance(wrapper, FlaskFileStorage):
+        return wrapper.stream
+    return wrapper.file
+
+
+def _should_remove_unsupported_resource_validation_reports(res_dict):
+    if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)):
+        return False
+    return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS
+            and (res_dict.get('url_type') == 'upload'
+                or not res_dict.get('url_type'))
+            and (t.h.asbool(res_dict.get('validation_status', False))
+                or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False))))
+
+
+def _run_async_validation(resource_id):
+
+    try:
+        t.get_action(u'resource_validation_run')(
+            {u'ignore_auth': True},
+            {u'resource_id': resource_id,
+             u'async': True})
+    except t.ValidationError as e:
+        log.warning(
+            u'Could not run validation for resource %s: %s',
+                resource_id, e)
+
+
+def _should_remove_unsupported_resource_validation_reports(res_dict):
+    if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)):
+        return False
+    return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS
+            and (res_dict.get('url_type') == 'upload'
+                or not res_dict.get('url_type'))
+            and (t.h.asbool(res_dict.get('validation_status', False))
+                or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False))))
+
+
+def _remove_unsupported_resource_validation_reports(resource_id):
+    """
+    Callback to remove unsupported validation reports.
+    Controlled by config value: ckanext.validation.clean_validation_reports.
+    Double check the resource format. Only supported Validation formats should have validation reports.
+    If the resource format is not supported, we should delete the validation reports.
+    """
+    context = {"ignore_auth": True}
+    try:
+        res = p.toolkit.get_action('resource_show')(context, {"id": resource_id})
+    except t.ObjectNotFound:
+        log.error('Resource %s does not exist.', resource_id)
+        return
+
+    if _should_remove_unsupported_resource_validation_reports(res):
+        log.info('Unsupported resource format "%s". Deleting validation reports for resource %s',
+            res.get(u'format', u''), res['id'])
+        try:
+            p.toolkit.get_action('resource_validation_delete')(context, {
+                "resource_id": res['id']})
+            log.info('Validation reports deleted for resource %s', res['id'])
+        except t.ObjectNotFound:
+            log.error('Validation reports for resource %s do not exist', res['id'])
+
 
 def get_update_mode_from_config():
     if asbool(

From b9c04fd0726857501649e9ab1d2594aa9c139a5e Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Wed, 11 Dec 2024 14:52:32 +1000
Subject: [PATCH 7/8] chore: cleanup

---
 ckanext/validation/jobs.py   | 87 +++++++++++++++++-------------------
 ckanext/validation/plugin.py | 19 ++++----
 ckanext/validation/utils.py  | 10 -----
 3 files changed, 51 insertions(+), 65 deletions(-)

diff --git a/ckanext/validation/jobs.py b/ckanext/validation/jobs.py
index f0d658ec..74726eb6 100644
--- a/ckanext/validation/jobs.py
+++ b/ckanext/validation/jobs.py
@@ -8,18 +8,15 @@
 import requests
 from sqlalchemy.orm.exc import NoResultFound
 from frictionless import validate, system, Report, Schema, Dialect, Check
+from six import string_types
 
 from ckan.model import Session
 import ckan.lib.uploader as uploader
 
 import ckantoolkit as t
 
-from ckanext.validation.model import Validation
-from ckanext.validation.utils import (
-    get_update_mode_from_config,
-    send_validation_report,
-    validation_dictize,
-)
+from .model import Validation
+from . import utils
 
 
 log = logging.getLogger(__name__)
@@ -27,7 +24,17 @@
 
 def run_validation_job(resource):
 
-    log.debug('Validating resource %s', resource['id'])
+    # handle either a resource dict or just an ID
+    # ID is more efficient, as resource dicts can be very large
+    if isinstance(resource, string_types):
+        log.debug(u'run_validation_job: calling resource_show: %s', resource)
+        resource = t.get_action('resource_show')({'ignore_auth': True}, {'id': resource})
+
+    resource_id = resource.get('id')
+    if resource_id:
+        log.debug(u'Validating resource: %s', resource_id)
+    else:
+        log.debug(u'Validating resource dict: %s', resource)
 
     try:
         validation = Session.query(Validation).filter(
@@ -59,37 +66,38 @@ def run_validation_job(resource):
         {'ignore_auth': True}, {'id': resource['package_id']})
 
     source = None
-    if resource.get('url_type') == 'upload':
+    if resource.get(u'url_type') == u'upload':
         upload = uploader.get_resource_uploader(resource)
         if isinstance(upload, uploader.ResourceUpload):
-            source = upload.get_path(resource['id'])
+            source = upload.get_path(resource[u'id'])
         else:
             # Upload is not the default implementation (ie it's a cloud storage
             # implementation)
             pass_auth_header = t.asbool(
-                t.config.get('ckanext.validation.pass_auth_header', True))
-            if dataset['private'] and pass_auth_header:
+                t.config.get(u'ckanext.validation.pass_auth_header', True))
+            if dataset[u'private'] and pass_auth_header:
                 s = requests.Session()
                 s.headers.update({
-                    'Authorization': t.config.get(
-                        'ckanext.validation.pass_auth_header_value',
-                        _get_site_user_api_key())
+                    u'Authorization': t.config.get(
+                        u'ckanext.validation.pass_auth_header_value',
+                        utils.get_site_user_api_key())
                 })
 
-                options['http_session'] = s
+                options[u'http_session'] = s
 
     if not source:
-        source = resource['url']
-
-    schema = resource.get('schema')
-    if schema:
-        if isinstance(schema, str):
-            if schema.startswith('http'):
-                r = requests.get(schema)
-                schema = r.json()
+        source = resource[u'url']
+
+    schema = resource.get(u'schema')
+    if schema and isinstance(schema, string_types):
+        if schema.startswith('http'):
+            r = requests.get(schema)
+            schema = r.json()
+        else:
             schema = json.loads(schema)
 
-    _format = resource['format'].lower()
+    _format = resource[u'format'].lower()
+
     report = _validate_table(source, _format=_format, schema=schema, **options)
 
     # Hide uploaded files
@@ -127,30 +135,27 @@ def run_validation_job(resource):
         'validation_timestamp': validation.finished.isoformat(),
     }
 
-    if get_update_mode_from_config() == 'sync':
+    if utils.get_update_mode_from_config() == 'sync':
         data_dict['_skip_next_validation'] = True,
 
-    patch_context = {
-        'ignore_auth': True,
+    t.get_action('resource_patch')(
+        {'ignore_auth': True,
         'user': t.get_action('get_site_user')({'ignore_auth': True})['name'],
-        '_validation_performed': True
-    }
-    t.get_action('resource_patch')(patch_context, data_dict)
-    send_validation_report(validation_dictize(validation))
-
+        '_validation_performed': True},
+        data_dict)
+    utils.send_validation_report(utils.validation_dictize(validation))
 
 
-
-def _validate_table(source, _format='csv', schema=None, **options):
+def _validate_table(source, _format=u'csv', schema=None, **options):
 
     # This option is needed to allow Frictionless Framework to validate absolute paths
     frictionless_context = { 'trusted': True }
     http_session = options.pop('http_session', None) or requests.Session()
-    use_proxy = 'ckan.download_proxy' in t.config
 
+    use_proxy = 'ckan.download_proxy' in t.config
     if use_proxy:
         proxy = t.config.get('ckan.download_proxy')
-        log.debug('Download resource for validation via proxy: %s', proxy)
+        log.debug(u'Download resource for validation via proxy: %s', proxy)
         http_session.proxies.update({'http': proxy, 'https': proxy})
 
     frictionless_context['http_session'] = http_session
@@ -168,14 +173,6 @@ def _validate_table(source, _format='csv', schema=None, **options):
 
     with system.use_context(**frictionless_context):
         report = validate(source, format=_format, schema=resource_schema, **options)
-        log.debug('Validating source: %s', source)
+        log.debug(u'Validating source: %s', source)
 
     return report
-
-
-def _get_site_user_api_key():
-
-    site_user_name = t.get_action('get_site_user')({'ignore_auth': True}, {})
-    site_user = t.get_action('get_site_user')(
-        {'ignore_auth': True}, {'id': site_user_name})
-    return site_user['apikey']
diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py
index a47648ce..2a92e4bc 100644
--- a/ckanext/validation/plugin.py
+++ b/ckanext/validation/plugin.py
@@ -6,12 +6,12 @@
 
 
 import ckan.plugins as p
-import ckantoolkit as t
+import ckantoolkit as tk
 
-from . import settings, utils, validators
+from . import settings as s, utils, validators
 from .helpers import _get_helpers
-from ckanext.validation.model import tables_exist
 from .logic import action, auth
+from .model import tables_exist
 
 from ckanext.validation.utils import (
     get_create_mode_from_config,
@@ -58,9 +58,9 @@ def update_config(self, config_):
         else:
             log.debug(u'Validation tables exist')
 
-        t.add_template_directory(config_, u'templates')
-        t.add_public_directory(config_, u'public')
-        t.add_resource(u'webassets', 'ckanext-validation')
+        tk.add_template_directory(config_, u'templates')
+        tk.add_public_directory(config_, u'public')
+        tk.add_resource(u'webassets', 'ckanext-validation')
 
     # IActions
 
@@ -141,7 +141,7 @@ def _handle_validation_for_resource(self, context, resource):
             ) and (
             # Make sure format is supported
             resource.get(u'format', u'').lower() in
-                settings.SUPPORTED_FORMATS
+                s.SUPPORTED_FORMATS
                 )):
             needs_validation = True
 
@@ -166,7 +166,7 @@ def before_resource_update(self, context, current_resource, updated_resource):
         # the call originates from a resource API, so don't validate the entire package
         package_id = updated_resource.get('package_id')
         if not package_id:
-            existing_resource = t.get_action('resource_show')(
+            existing_resource = tk.get_action('resource_show')(
                 context={'ignore_auth': True}, data_dict={'id': updated_resource['id']})
             if existing_resource:
                 package_id = existing_resource['package_id']
@@ -190,7 +190,7 @@ def before_resource_update(self, context, current_resource, updated_resource):
             ) and (
             # Make sure format is supported
             updated_resource.get(u'format', u'').lower() in
-                settings.SUPPORTED_FORMATS
+                s.SUPPORTED_FORMATS
                 )):
             needs_validation = True
 
@@ -287,4 +287,3 @@ def before_dataset_index(self, index_dict):
             index_dict['vocab_validation_status'] = res_status
 
         return index_dict
-
diff --git a/ckanext/validation/utils.py b/ckanext/validation/utils.py
index f5cdb9fe..4c0881a7 100644
--- a/ckanext/validation/utils.py
+++ b/ckanext/validation/utils.py
@@ -80,16 +80,6 @@ def _run_async_validation(resource_id):
                 resource_id, e)
 
 
-def _should_remove_unsupported_resource_validation_reports(res_dict):
-    if not t.h.asbool(t.config.get('ckanext.validation.clean_validation_reports', False)):
-        return False
-    return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS
-            and (res_dict.get('url_type') == 'upload'
-                or not res_dict.get('url_type'))
-            and (t.h.asbool(res_dict.get('validation_status', False))
-                or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False))))
-
-
 def _remove_unsupported_resource_validation_reports(resource_id):
     """
     Callback to remove unsupported validation reports.

From 47476bd9ea7eb9573874de07214fdd8e03411c56 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Thu, 12 Dec 2024 08:16:48 +1000
Subject: [PATCH 8/8] chore: rename helpers._get_helpers() to get_helpers()

---
 ckanext/validation/helpers.py | 2 +-
 ckanext/validation/plugin.py  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ckanext/validation/helpers.py b/ckanext/validation/helpers.py
index aa36572b..5192ac89 100644
--- a/ckanext/validation/helpers.py
+++ b/ckanext/validation/helpers.py
@@ -4,7 +4,7 @@
 from ckan.lib.helpers import url_for_static
 from ckantoolkit import url_for, _, config, asbool, literal, h
 
-def _get_helpers():
+def get_helpers():
     validators = (
         get_validation_badge,
         validation_extract_report_from_errors,
diff --git a/ckanext/validation/plugin.py b/ckanext/validation/plugin.py
index 2a92e4bc..af5fd081 100644
--- a/ckanext/validation/plugin.py
+++ b/ckanext/validation/plugin.py
@@ -9,7 +9,7 @@
 import ckantoolkit as tk
 
 from . import settings as s, utils, validators
-from .helpers import _get_helpers
+from .helpers import get_helpers
 from .logic import action, auth
 from .model import tables_exist
 
@@ -75,7 +75,7 @@ def get_auth_functions(self):
     # ITemplateHelpers
 
     def get_helpers(self):
-        return _get_helpers()
+        return get_helpers()
 
     # IValidators