diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..55f2a01d --- /dev/null +++ b/.flake8 @@ -0,0 +1,21 @@ +[flake8] +# @see https://flake8.pycqa.org/en/latest/user/configuration.html?highlight=.flake8 + +exclude = + ckan + +# Extended output format. +format = pylint + +# Show the source of errors. +show_source = True +statistics = True + +max-complexity = 10 +max-line-length = 127 + +# List ignore rules one per line. +ignore = + C901 + E501 + W503 diff --git a/ckanext/validation/commands.py b/ckanext/validation/commands.py index 4cb1ba69..3c30bba0 100644 --- a/ckanext/validation/commands.py +++ b/ckanext/validation/commands.py @@ -111,7 +111,6 @@ def __init__(self, name): help='''Location of the CSV validation report file on the relevant commands.''') - _page_size = 100 def command(self): @@ -162,8 +161,8 @@ def run_validation(self): elif not self.options.assume_yes: - msg = ('\nYou are about to start validation for {0} datasets' + - '.\n Do you want to continue?') + msg = ('\nYou are about to start validation for {0} datasets' + + '.\n Do you want to continue?') confirm = query_yes_no(msg.format(query['count'])) @@ -186,8 +185,8 @@ def _run_validation_on_resource(self, resource_id, dataset_id): {u'resource_id': resource_id, u'async': True}) - msg = ('Resource {} from dataset {} sent to ' + - 'the validation queue') + msg = ('Resource {} from dataset {} sent to ' + + 'the validation queue') log.debug( msg.format(resource_id, dataset_id)) @@ -309,7 +308,7 @@ def report(self, full=False): outputs['resources_' + resource['validation_status']] += 1 if resource.get('validation_status') in ( - 'failure', 'error'): + 'failure', 'error'): if full: row_counts = self._process_row_full(dataset, resource, writer) if not row_counts: @@ -332,7 +331,6 @@ def report(self, full=False): else: outputs['formats_success'][resource['format']] = 1 - if len(query['results']) < self._page_size: break @@ -380,6 +378,4 @@ def report(self, full=False): {msg_errors} CSV Report stored in {output_csv} '''.format(**outputs) - - log.info(msg) diff --git a/ckanext/validation/helpers.py b/ckanext/validation/helpers.py index b6c856df..59c4f856 100644 --- a/ckanext/validation/helpers.py +++ b/ckanext/validation/helpers.py @@ -72,9 +72,11 @@ def validation_extract_report_from_errors(errors): return report, errors + def validation_dict(validation_json): return json.loads(validation_json) + def dump_json_value(value, indent=None): """ Returns the object passed serialized as a JSON string. diff --git a/ckanext/validation/jobs.py b/ckanext/validation/jobs.py index f0d658ec..15a41175 100644 --- a/ckanext/validation/jobs.py +++ b/ckanext/validation/jobs.py @@ -93,7 +93,7 @@ def run_validation_job(resource): report = _validate_table(source, _format=_format, schema=schema, **options) # Hide uploaded files - if type(report) == Report: + if report is Report: report = report.to_dict() if 'tasks' in report: @@ -109,7 +109,7 @@ def run_validation_job(resource): validation.report = json.dumps(report) else: validation.report = json.dumps(report) - if 'errors' in report and report['errors']: + if 'errors' in report and report['errors']: validation.status = 'error' validation.error = { 'message': [str(err) for err in report['errors']]} @@ -139,12 +139,10 @@ def run_validation_job(resource): send_validation_report(validation_dictize(validation)) - - def _validate_table(source, _format='csv', schema=None, **options): # This option is needed to allow Frictionless Framework to validate absolute paths - frictionless_context = { 'trusted': True } + frictionless_context = {'trusted': True} http_session = options.pop('http_session', None) or requests.Session() use_proxy = 'ckan.download_proxy' in t.config diff --git a/ckanext/validation/logic.py b/ckanext/validation/logic.py index f5400d45..401d2eb4 100644 --- a/ckanext/validation/logic.py +++ b/ckanext/validation/logic.py @@ -97,9 +97,8 @@ def resource_validation_run(context, data_dict): # Ensure format is supported if not resource.get(u'format', u'').lower() in settings.SUPPORTED_FORMATS: raise t.ValidationError( - {u'format': u'Unsupported resource format.' + - u'Must be one of {}'.format( - u','.join(settings.SUPPORTED_FORMATS))}) + {u'format': u'Unsupported resource format.' + + u'Must be one of {}'.format(u','.join(settings.SUPPORTED_FORMATS))}) # Ensure there is a URL or file upload if not resource.get(u'url') and not resource.get(u'url_type') == u'upload': @@ -268,15 +267,15 @@ def resource_validation_run_batch(context, data_dict): if isinstance(dataset_ids, str): try: dataset_ids = json.loads(dataset_ids) - except ValueError as e: + except ValueError: dataset_ids = [dataset_ids] search_params = data_dict.get('query') if isinstance(search_params, str): try: search_params = json.loads(search_params) - except ValueError as e: - msg = 'Error parsing search parameters'.format(search_params) + except ValueError: + msg = 'Error parsing search parameters {0}'.format(search_params) return {'output': msg} while True: @@ -312,9 +311,9 @@ def resource_validation_run_batch(context, data_dict): except t.ValidationError as e: log.warning( - u'Could not run validation for resource %s ' + - u'from dataset %s: %s', - resource['id'], dataset['name'], e) + u'Could not run validation for resource %s ' + + u'from dataset %s: %s', + resource['id'], dataset['name'], e) if len(query['results']) < page_size: break @@ -390,8 +389,8 @@ def _update_search_params(search_data_dict, user_search_params=None): else: search_data_dict['fq'] = user_search_params['fq'] - if (user_search_params.get('fq_list') and - isinstance(user_search_params['fq_list'], list)): + if (user_search_params.get('fq_list') + and isinstance(user_search_params['fq_list'], list)): search_data_dict['fq_list'].extend(user_search_params['fq_list']) @@ -435,7 +434,7 @@ def resource_create(up_func, context, data_dict): {'id': package_id}) t.check_access('resource_create', context, data_dict) - + # Check if CKAN version is min 2.10 if ckan_2_10: for plugin in plugins.PluginImplementations(plugins.IResourceController): @@ -487,9 +486,9 @@ def resource_create(up_func, context, data_dict): if run_validation: is_local_upload = ( - hasattr(upload, 'filename') and - upload.filename is not None and - isinstance(upload, uploader.ResourceUpload)) + hasattr(upload, 'filename') + and upload.filename is not None + and isinstance(upload, uploader.ResourceUpload)) _run_sync_validation( resource_id, local_upload=is_local_upload, new_resource=True) @@ -511,7 +510,7 @@ def resource_create(up_func, context, data_dict): {'resource': resource, 'package': updated_pkg_dict }) - + if ckan_2_10: for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.after_resource_create(context, resource) @@ -568,10 +567,10 @@ def resource_update(up_func, context, data_dict): raise t.ObjectNotFound(t._('Resource was not found.')) # Persist the datastore_active extra if already present and not provided - if ('datastore_active' in resource.extras and - 'datastore_active' not in data_dict): + if ('datastore_active' in resource.extras + and 'datastore_active' not in data_dict): data_dict['datastore_active'] = resource.extras['datastore_active'] - + if ckan_2_10: for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.before_resource_update(context, pkg_dict['resources'][n], data_dict) @@ -617,9 +616,9 @@ def resource_update(up_func, context, data_dict): if run_validation: is_local_upload = ( - hasattr(upload, 'filename') and - upload.filename is not None and - isinstance(upload, uploader.ResourceUpload)) + hasattr(upload, 'filename') + and upload.filename is not None + and isinstance(upload, uploader.ResourceUpload)) _run_sync_validation( id, local_upload=is_local_upload, new_resource=False) @@ -656,7 +655,7 @@ def _run_sync_validation(resource_id, local_upload=False, new_resource=True): except t.ValidationError as e: log.info( u'Could not run validation for resource %s: %s', - resource_id, e) + resource_id, e) return validation = t.get_action(u'resource_validation_show')( diff --git a/ckanext/validation/plugin/__init__.py b/ckanext/validation/plugin/__init__.py index b435d147..31a91030 100644 --- a/ckanext/validation/plugin/__init__.py +++ b/ckanext/validation/plugin/__init__.py @@ -146,8 +146,8 @@ def _process_schema_fields(self, data_dict): data_dict["schema"] = data_dict["schema"].decode() elif schema_url: - if (not isinstance(schema_url, str) or - not schema_url.lower()[:4] == u'http'): + if (not isinstance(schema_url, str) + or not schema_url.lower()[:4] == u'http'): raise t.ValidationError({u'schema_url': 'Must be a valid URL'}) data_dict[u'schema'] = schema_url elif schema_json: @@ -159,7 +159,7 @@ def before_resource_create(self, context, data_dict): context["_resource_create_call"] = True return self._process_schema_fields(data_dict) - + def before_create(self, context, data_dict): if not self._data_dict_is_dataset(data_dict): @@ -190,16 +190,18 @@ def _data_dict_is_dataset(self, data_dict): def _handle_validation_for_resource(self, context, resource): needs_validation = False - if (( - # File uploaded - resource.get(u'url_type') == u'upload' or - # URL defined - resource.get(u'url') - ) and ( - # Make sure format is supported - resource.get(u'format', u'').lower() in - settings.SUPPORTED_FORMATS - )): + if ( + ( + # File uploaded + resource.get(u'url_type') == u'upload' + # URL defined + or resource.get(u'url') + ) and ( + # Make sure format is supported + resource.get(u'format', u'').lower() in + settings.SUPPORTED_FORMATS + ) + ): needs_validation = True if needs_validation: @@ -228,22 +230,21 @@ def before_update(self, context, current_resource, updated_resource): return updated_resource needs_validation = False - if (( - # New file uploaded - updated_resource.get(u'upload') or - # External URL changed - updated_resource.get(u'url') != current_resource.get(u'url') or - # Schema changed - (updated_resource.get(u'schema') != - current_resource.get(u'schema')) or - # Format changed - (updated_resource.get(u'format', u'').lower() != - current_resource.get(u'format', u'').lower()) - ) and ( - # Make sure format is supported - updated_resource.get(u'format', u'').lower() in - settings.SUPPORTED_FORMATS - )): + if ( + ( + # New file uploaded + updated_resource.get(u'upload') + # External URL changed + or updated_resource.get(u'url') != current_resource.get(u'url') + # Schema changed + or updated_resource.get(u'schema') != current_resource.get(u'schema') + # Format changed + or updated_resource.get(u'format', u'').lower() != current_resource.get(u'format', u'').lower() + ) and ( + # Make sure format is supported + updated_resource.get(u'format', u'').lower() in settings.SUPPORTED_FORMATS + ) + ): needs_validation = True if needs_validation: @@ -307,19 +308,18 @@ def after_update(self, context, data_dict): del self.resources_to_validate[resource_id] _run_async_validation(resource_id) - + def after_dataset_create(self, context, data_dict): self.after_create(context, data_dict) - + def before_resource_update(self, context, current_resource, updated_resource): self.before_update(context, current_resource, updated_resource) def after_dataset_update(self, context, data_dict): self.after_update(context, data_dict) - - if _should_remove_unsupported_resource_validation_reports(data_dict): - p.toolkit.enqueue_job(fn=_remove_unsupported_resource_validation_reports, args=[resource_id]) + if _should_remove_unsupported_resource_validation_reports(data_dict): + p.toolkit.enqueue_job(fn=_remove_unsupported_resource_validation_reports, args=data_dict[u'id']) # IPackageController @@ -355,7 +355,8 @@ def _run_async_validation(resource_id): except t.ValidationError as e: log.warning( u'Could not run validation for resource %s: %s', - resource_id, e) + resource_id, e) + def _get_underlying_file(wrapper): if isinstance(wrapper, FlaskFileStorage): @@ -368,9 +369,9 @@ def _should_remove_unsupported_resource_validation_reports(res_dict): return False return (not res_dict.get('format', u'').lower() in settings.SUPPORTED_FORMATS and (res_dict.get('url_type') == 'upload' - or not res_dict.get('url_type')) + or not res_dict.get('url_type')) and (t.h.asbool(res_dict.get('validation_status', False)) - or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False)))) + or t.h.asbool(res_dict.get('extras', {}).get('validation_status', False)))) def _remove_unsupported_resource_validation_reports(resource_id): @@ -389,7 +390,7 @@ def _remove_unsupported_resource_validation_reports(resource_id): if _should_remove_unsupported_resource_validation_reports(res): log.info('Unsupported resource format "%s". Deleting validation reports for resource %s', - res.get(u'format', u''), res['id']) + res.get(u'format', u''), res['id']) try: p.toolkit.get_action('resource_validation_delete')(context, { "resource_id": res['id']}) diff --git a/ckanext/validation/tests/fixtures.py b/ckanext/validation/tests/fixtures.py index 3e1704f7..df867281 100644 --- a/ckanext/validation/tests/fixtures.py +++ b/ckanext/validation/tests/fixtures.py @@ -3,8 +3,6 @@ from ckan.lib import uploader from ckanext.validation.model import create_tables, tables_exist -import ckantoolkit as t - @pytest.fixture def validation_setup(): diff --git a/ckanext/validation/tests/helpers.py b/ckanext/validation/tests/helpers.py index 4922f044..47ccfa22 100644 --- a/ckanext/validation/tests/helpers.py +++ b/ckanext/validation/tests/helpers.py @@ -1,5 +1,4 @@ import builtins -import cgi import functools from unittest import mock from io import BytesIO @@ -8,7 +7,6 @@ from pyfakefs import fake_filesystem import ckan.lib.uploader -from ckan.plugins import toolkit from ckan.tests.helpers import change_config @@ -146,6 +144,7 @@ def wrapper(*args, **kwargs): class MockFieldStorage(FileStorage): pass + def get_mock_file(contents): mock_file = BytesIO() mock_file.write(contents.encode('utf8')) diff --git a/ckanext/validation/tests/test_form.py b/ckanext/validation/tests/test_form.py index 8d443cb2..f589a85f 100644 --- a/ckanext/validation/tests/test_form.py +++ b/ckanext/validation/tests/test_form.py @@ -1,6 +1,5 @@ import json import io -from unittest import mock import datetime import pytest @@ -8,7 +7,7 @@ import ckantoolkit as t from ckantoolkit.tests.factories import Sysadmin, Dataset from ckantoolkit.tests.helpers import ( - call_action, _get_test_app + call_action ) from ckanext.validation.tests.helpers import VALID_CSV, INVALID_CSV @@ -255,7 +254,7 @@ def test_resource_form_update_upload(self, app): value = {"fields": [{"name": "code"}, {"name": "department"}, {"name": "date"}]} json_value = bytes(json.dumps(value).encode('utf8')) - upload = ('schema_upload', 'schema.json', json_value) + ('schema_upload', 'schema.json', json_value) data = { "url": "https://example.com/data.csv", @@ -451,7 +450,7 @@ def test_resource_form_update_invalid(self, app): user = Sysadmin() env = {"REMOTE_USER": user["name"].encode("ascii")} - dataset2 = call_action("package_show", id=dataset["id"]) + call_action("package_show", id=dataset["id"]) response = app.post( url=_edit_resource_url(dataset['id'], dataset['resources'][0]['id']), extra_environ=env, diff --git a/ckanext/validation/tests/test_interfaces.py b/ckanext/validation/tests/test_interfaces.py index cadca410..c6e930c4 100644 --- a/ckanext/validation/tests/test_interfaces.py +++ b/ckanext/validation/tests/test_interfaces.py @@ -4,8 +4,6 @@ from ckan import plugins as p from ckan.tests import helpers, factories -import ckantoolkit as t - from ckanext.validation.interfaces import IDataValidation from ckanext.validation.tests.helpers import VALID_REPORT @@ -40,14 +38,13 @@ def setup(self): for plugin in p.PluginImplementations(IDataValidation): return plugin.reset_counter() + @pytest.fixture def reset_counter(): for plugin in p.PluginImplementations(IDataValidation): return plugin.reset_counter() - - @pytest.mark.usefixtures("clean_db", "validation_setup", "with_plugins", "reset_counter") @pytest.mark.ckan_config("ckan.plugins", "validation test_validation_plugin scheming_datasets") class TestInterfaceSync(): @@ -130,6 +127,7 @@ def test_can_validate_called_on_update_sync_no_validation(self, mock_validation) assert not mock_validation.called + @pytest.mark.usefixtures("clean_db", "validation_setup", "with_plugins", "reset_counter") @pytest.mark.ckan_config("ckan.plugins", "validation test_validation_plugin scheming_datasets") @pytest.mark.ckan_config('ckanext.validation.run_on_create_sync', False) diff --git a/ckanext/validation/tests/test_jobs.py b/ckanext/validation/tests/test_jobs.py index 8e770eb0..f0d28efb 100644 --- a/ckanext/validation/tests/test_jobs.py +++ b/ckanext/validation/tests/test_jobs.py @@ -9,7 +9,7 @@ from ckan.tests.helpers import call_action from ckan.tests import factories -from ckanext.validation.model import create_tables, tables_exist, Validation +from ckanext.validation.model import Validation from ckanext.validation.jobs import run_validation_job, uploader, Session from ckanext.validation.tests.helpers import ( VALID_REPORT, diff --git a/ckanext/validation/tests/test_logic.py b/ckanext/validation/tests/test_logic.py index 1271a152..4ee7d58d 100644 --- a/ckanext/validation/tests/test_logic.py +++ b/ckanext/validation/tests/test_logic.py @@ -199,7 +199,6 @@ def test_resource_validation_only_called_on_resource_updated( assert mock_enqueue_job.call_args[0][1][0]["id"] == resource_1_id - @pytest.mark.usefixtures("clean_db", "validation_setup", "with_plugins") class TestResourceValidationShow(object): def test_resource_validation_show_param_missing(self): @@ -542,7 +541,7 @@ def test_validation_fails_no_validation_object_stored(self): dataset = factories.Dataset() - invalid_stream = io.BufferedReader(io.BytesIO(INVALID_CSV.encode('utf8'))) + io.BufferedReader(io.BytesIO(INVALID_CSV.encode('utf8'))) validation_count_before = model.Session.query(Validation).count() diff --git a/ckanext/validation/tests/test_plugin.py b/ckanext/validation/tests/test_plugin.py index 866db6c0..9a67f16d 100644 --- a/ckanext/validation/tests/test_plugin.py +++ b/ckanext/validation/tests/test_plugin.py @@ -1,10 +1,9 @@ import pytest from unittest import mock -from ckan.tests.helpers import call_action, reset_db +from ckan.tests.helpers import call_action from ckan.tests import factories -from ckanext.validation.model import create_tables, tables_exist from ckanext.validation.jobs import run_validation_job