diff --git a/MANIFEST.in b/MANIFEST.in index 49bc7b31..fb13caf9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,5 +10,6 @@ recursive-include tests * recursive-include etc * recursive-exclude * __pycache__ recursive-exclude * *.py[co] +recursive-include etc * recursive-include docs *.md conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/dachar/analyse/checks/_base_check.py b/dachar/analyse/checks/_base_check.py index 460ac013..cfb15a1d 100644 --- a/dachar/analyse/checks/_base_check.py +++ b/dachar/analyse/checks/_base_check.py @@ -1,13 +1,13 @@ -import logging import pprint as pp -from dachar.utils import UNDEFINED, nested_lookup, JDict - -from dachar.utils.get_stores import get_fix_prop_store, get_dc_store - -from dachar.fixes.fix_api import get_fix - +from dachar import __version__ as version from dachar import logging +from dachar.fixes.fix_api import get_fix +from dachar.utils import JDict +from dachar.utils import nested_lookup +from dachar.utils import UNDEFINED +from dachar.utils.get_stores import get_dc_store +from dachar.utils.get_stores import get_fix_prop_store LOGGER = logging.getLogger(__file__) @@ -53,6 +53,13 @@ class _BaseCheck(object): typical_threshold = 0.41 atypical_threshold = 0.15 + source = { + "name": "dachar", + "version": f"{version}", + "comment": "No specific source provided - link to all fixes in dachar", + "url": "https://github.com/roocs/dachar/tree/master/dachar/fixes", + } + def __init__(self, sample): self.sample = sample self._load() @@ -124,12 +131,10 @@ def _extract_content(self): content = [] for ds_id in self.sample: - items = dict( - [ - (key, nested_lookup(key, self._cache[ds_id], must_exist=True)) - for key in self.characteristics - ] - ) + items = { + key: nested_lookup(key, self._cache[ds_id], must_exist=True) + for key in self.characteristics + } content.append((ds_id, items)) return content @@ -144,5 +149,3 @@ def deduce_fix(self, ds_id, atypical_content, typical_content): def _propose_fix(self, ds_id, fix): get_fix_prop_store().propose(ds_id, fix) - - diff --git a/dachar/analyse/checks/coord_checks.py b/dachar/analyse/checks/coord_checks.py index f66612ae..0f3c9146 100644 --- a/dachar/analyse/checks/coord_checks.py +++ b/dachar/analyse/checks/coord_checks.py @@ -1,17 +1,29 @@ +import pprint +from collections import Counter +from collections import namedtuple +from itertools import chain + +from scipy.stats import mode + +from dachar import __version__ as version from dachar.analyse.checks._base_check import _BaseCheck from dachar.fixes.fix_api import get_fix -from dachar.utils.common import get_extra_items_in_larger_sequence, coord_mappings from dachar.utils import nested_lookup -from scipy.stats import mode -from collections import Counter, namedtuple -from itertools import chain -import pprint +from dachar.utils.common import coord_mappings +from dachar.utils.common import get_extra_items_in_larger_sequence class RankCheck(_BaseCheck): characteristics = ["data.dim_names", "data.shape"] associated_fix = "SqueezeDimensionsFix" + source = { + "name": "dachar", + "version": f"{version}", + "comment": "", + "url": "https://github.com/roocs/dachar/blob/master/dachar/fixes/coord_fixes.py#L8", + } + def deduce_fix(self, ds_id, atypical_content, typical_content): dicts = [] @@ -38,7 +50,7 @@ def deduce_fix(self, ds_id, atypical_content, typical_content): operands = {"dims": extra_coords} - fix = fix_cls(ds_id, **operands) + fix = fix_cls(ds_id, source=self.source, **operands) d = fix.to_dict() dicts.append(d) return dicts @@ -51,10 +63,18 @@ def deduce_fix(self, ds_id, atypical_content, typical_content): # TODO: Need to change this so that characteristic compared is coord_type # But characteristic used to create new variable is id + class MissingCoordCheck(_BaseCheck): characteristics = ["coordinates.*.id"] associated_fix = "AddScalarCoordFix" + source = { + "name": "dachar", + "version": f"{version}", + "comment": "", + "url": "https://github.com/roocs/dachar/blob/master/dachar/fixes/coord_fixes.py#L44", + } + def deduce_fix(self, ds_id, atypical_content, typical_content): dicts = [] @@ -112,7 +132,7 @@ def deduce_fix(self, ds_id, atypical_content, typical_content): operands["attrs"] = operand_dict - fix = fix_cls(ds_id, **operands) + fix = fix_cls(ds_id, source=self.source, **operands) d = fix.to_dict() # coordinate isn't scalar - fix isn't suitable diff --git a/dachar/cli.py b/dachar/cli.py index 7ca1b44b..0c9c2dc6 100644 --- a/dachar/cli.py +++ b/dachar/cli.py @@ -2,8 +2,6 @@ import os import shutil -from dachar.utils._stores_for_tests import _TestFixProposalStore - """Console script for dachar.""" __author__ = """Elle Smith""" @@ -18,6 +16,10 @@ from dachar.analyse.sample_analyser import analyse from dachar.fixes import fix_processor from dachar.fixes.fix_processor import process_all_fixes +from dachar.fixes.generate_proposals import ( + generate_fix_proposals, + generate_proposal_from_template, +) from unittest.mock import Mock @@ -41,8 +43,10 @@ def _get_arg_parser_scan(parser): :return: Namespace object built from attributes parsed from command line. """ # parser = argparse.ArgumentParser() - project_options = [_.split(':')[1] for _ in CONFIG.keys() if _.startswith('project:')] - location_options = CONFIG['dachar:settings']['locations'] + project_options = [ + _.split(":")[1] for _ in CONFIG.keys() if _.startswith("project:") + ] + location_options = CONFIG["dachar:settings"]["locations"] parser.add_argument( "project", @@ -130,8 +134,10 @@ def scan_main(args): def _get_arg_parser_analyse(parser): - project_options = [_.split(':')[1] for _ in CONFIG.keys() if _.startswith('project:')] - location_options = CONFIG['dachar:settings']['locations'] + project_options = [ + _.split(":")[1] for _ in CONFIG.keys() if _.startswith("project:") + ] + location_options = CONFIG["dachar:settings"]["locations"] parser.add_argument( "project", @@ -200,7 +206,7 @@ def _get_arg_parser_process_fixes(parser): type=str, default=None, required=True, - help="Action to carry out on fixes: process for proposed fixes, withdraw to withdraw" + help="Action to carry out on fixes: process for proposed fixes, withdraw to withdraw " "existing fixes", ) @@ -218,6 +224,71 @@ def process_fixes_main(args): process_all_fixes(action, ds_ids) +def _get_arg_parser_propose_fixes(parser): + + parser.add_argument( + "-f", + "--files", + type=str, + default=None, + required=False, + help="List of comma-separated json files containing information to generate fix proposals. " + "This option must be used on its own", + ) + + parser.add_argument( + "-d", + "--dataset-list", + type=str, + default=None, + required=False, + help="Text file containing dataset ids for which to propose the fix provided in the template. " + "If using this option you must provide a template using --template (-t) option.", + ) + + parser.add_argument( + "-t", + "--template", + type=str, + default=None, + required=False, + help="Template for fix proposal. " + "If using this option you must provide a list of dataset ids using the --dataset-list (-d) option.", + ) + + return parser + + +def parse_args_propose_fixes(args): + + if args.files: + if args.dataset_list or args.template: + raise Exception( + "The file option must be used on its own. " + "A dataset list and a template must be provided together. " + ) + + if args.dataset_list and not args.template: + raise Exception("A dataset list and a template must be provided together.") + + if args.template and not args.dataset_list: + raise Exception("A dataset list and a template must be provided together.") + + files = _to_list(args.files) + ds_list = args.dataset_list + template = args.template + return files, ds_list, template + + +def propose_fixes_main(args): + files, ds_list, template = parse_args_propose_fixes(args) + + if files: + generate_fix_proposals(files) + elif ds_list and template: + generate_proposal_from_template(template, ds_list) + + def main(): """Console script for dachar.""" main_parser = argparse.ArgumentParser() @@ -235,9 +306,13 @@ def main(): _get_arg_parser_process_fixes(fix_parser) fix_parser.set_defaults(func=process_fixes_main) + fix_proposal_parser = subparsers.add_parser("propose-fixes") + _get_arg_parser_propose_fixes(fix_proposal_parser) + fix_proposal_parser.set_defaults(func=propose_fixes_main) + args = main_parser.parse_args() args.func(args) if __name__ == "__main__": - sys.exit(main()) # pragma: no cover + sys.exit(main()) diff --git a/dachar/etc/roocs.ini b/dachar/etc/roocs.ini new file mode 100644 index 00000000..f38fe615 --- /dev/null +++ b/dachar/etc/roocs.ini @@ -0,0 +1,44 @@ +[config_data_types] +extra_lists = locations common cmip5 cmip6 cordex +extra_ints = memory_large memory_small dir_grouping_level +extra_floats = concern_threshold + +[dachar:processing] +queue = short-serial +wallclock_large = 23:59 +memory_large = 32000 +wallclock_small = 04:00 +memory_small = 4000 + +[dachar:output_paths] +# reformat these +_base_path = ./outputs +base_log_dir = %(_base_path)s/logs +batch_output_path = %(base_log_dir)s/batch-outputs/{grouped_ds_id} +json_output_path = %(_base_path)s/register/{grouped_ds_id}.json +success_path = %(base_log_dir)s/success/{grouped_ds_id}.log +no_files_path = %(base_log_dir)s/failure/no_files/{grouped_ds_id}.log +pre_extract_error_path = %(base_log_dir)s/failure/pre_extract_error/{grouped_ds_id}.log +extract_error_path = %(base_log_dir)s/failure/extract_error/{grouped_ds_id}.log +write_error_path = %(base_log_dir)s/failure/write_error/{grouped_ds_id}.log +fix_path = %(_base_path)s/fixes/{grouped_ds_id}.json + + +[dachar:checks] +common = coord_checks.RankCheck coord_checks.MissingCoordCheck +cmip5 = +cmip6 = test +cordex = + + +[dachar:settings] +elastic_api_token = +dir_grouping_level = 4 +concern_threshold = 0.2 +locations = ceda dkrz other + + +# From old options file - do we still need these? +# dataset_id = cordex.%(product)s.%(domain)s.%(institute)s.%(driving_model)s.%(experiment)s.%(ensemble)s.%(rcm_name)s.%(rcm_version)s.%(time_frequency)s.%(variable)s + +# directory_format = %(root)s/%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(rcm_model)s/%(rcm_version)s/%(time_frequency)s/%(variable)s/%(version)s \ No newline at end of file diff --git a/dachar/fixes/__init__.py b/dachar/fixes/__init__.py index 6ac66023..6d278e05 100644 --- a/dachar/fixes/__init__.py +++ b/dachar/fixes/__init__.py @@ -1,2 +1,8 @@ -from .fix_proposal_store import LocalFixProposalStore, ElasticFixProposalStore -from .fix_store import LocalFixStore, ElasticFixStore +from .array_fixes import * +from .attr_fixes import * +from .coord_fixes import * +from .fix_proposal_store import ElasticFixProposalStore +from .fix_proposal_store import LocalFixProposalStore +from .fix_store import ElasticFixStore +from .fix_store import LocalFixStore +from .var_fixes import * diff --git a/dachar/fixes/_base_fix.py b/dachar/fixes/_base_fix.py index 0226ac36..d07f4a52 100644 --- a/dachar/fixes/_base_fix.py +++ b/dachar/fixes/_base_fix.py @@ -1,5 +1,4 @@ from dachar.utils.common import UNDEFINED -from dachar import __version__ as version class FixDetails(object): @@ -51,7 +50,7 @@ class _BaseDatasetFix(object): """ - def __init__(self, ds_id, source=f"dachar version {version}", **operands): + def __init__(self, ds_id, source, **operands): self.ds_id = ds_id self.source = source self.operands = operands diff --git a/dachar/fixes/array_fixes.py b/dachar/fixes/array_fixes.py new file mode 100644 index 00000000..29724381 --- /dev/null +++ b/dachar/fixes/array_fixes.py @@ -0,0 +1,20 @@ +from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.utils.common import UNDEFINED + +__all__ = ["MaskDataFix"] + + +class MaskDataFix(_BaseDatasetFix): + fix_id = "MaskDataFix" + title = "Apply Mask to Data" + description = """ +Masks data equal to a given value. + +For example: + - inputs: + - {'value': '1.0e33'} +""" + category = "array_fixes" + required_operands = ["value"] + ref_implementation = "daops.data_utils.array_utils.mask_data" + process_type = "post_processor" diff --git a/dachar/fixes/attr_fixes.py b/dachar/fixes/attr_fixes.py new file mode 100644 index 00000000..9b840955 --- /dev/null +++ b/dachar/fixes/attr_fixes.py @@ -0,0 +1,57 @@ +from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.utils.common import UNDEFINED + +__all__ = ["MainVarAttrFix", "AttrFix"] + + +class MainVarAttrFix(_BaseDatasetFix): + fix_id = "MainVarAttrFix" + title = "Apply Fix to Attributes of Main Variable" + description = """ +"Applies metadata fix e.g. fixing standard name or adding missing standard name + for the main variable of the dataset. + +Takes a dictionary of fixes with each fix as a key and value pair with the attribute +as the key and what the value should be as the value. e.g.: + +{"long_name": "Dissolved Oxygen Concentration", +"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + +For example: + - inputs: + {"attrs": + {"long_name": "Dissolved Oxygen Concentration", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + }, +""" + category = "attr_fixes" + required_operands = ["attrs"] + ref_implementation = "daops.data_utils.attr_utils.fix_attr_main_var" + process_type = "post_processor" + + +class AttrFix(_BaseDatasetFix): + fix_id = "AttrFix" + title = "Apply Fix to Attributes of any Variable" + description = """ +"Applies metadata fix e.g. fixing standard name or adding missing standard name + for a given variable of the dataset. + +Takes a dictionary of fixes with each fix as a key and value pair with the attribute +as the key and what the value should be as the value. e.g.: + +{"long_name": "Dissolved Oxygen Concentration", +"standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + +For example: + - inputs: + {"var_id": "lev", + "attrs": + {"long_name": "Dissolved Oxygen Concentration", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + }, +""" + category = "attr_fixes" + required_operands = ["var_id", "attrs"] + ref_implementation = "daops.data_utils.attr_utils.fix_attr" + process_type = "post_processor" diff --git a/dachar/fixes/coord_fixes.py b/dachar/fixes/coord_fixes.py index 81afc319..776c9274 100644 --- a/dachar/fixes/coord_fixes.py +++ b/dachar/fixes/coord_fixes.py @@ -1,8 +1,7 @@ -from dachar.utils.common import UNDEFINED - from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.utils.common import UNDEFINED -__all__ = ["SqueezeDimensionsFix", "AddScalarCoordFix"] +__all__ = ["SqueezeDimensionsFix", "AddScalarCoordFix", "ReverseCoordFix"] class SqueezeDimensionsFix(_BaseDatasetFix): @@ -63,3 +62,25 @@ class AddScalarCoordFix(_BaseDatasetFix): required_operands = ["dtype", "id", "value", "length", "attrs", "coord_type"] ref_implementation = "daops.data_utils.coord_utils.add_scalar_coord" process_type = "post_processor" + + +class ReverseCoordFix(_BaseDatasetFix): + fix_id = "ReverseCoordFix" + title = "Reverse data of Coordinates" + description = """ +"Reverses the order of the data of a given coordinate + +Takes as an input the name of the coordinate/s to be reversed: + +For example: + - inputs: + {"coords": [ + "lat", + "lev" + ] + }, +""" + category = "coord_fixes" + required_operands = ["coords"] + ref_implementation = "daops.data_utils.coord_utils.reverse_coords" + process_type = "post_processor" diff --git a/dachar/fixes/fix_api.py b/dachar/fixes/fix_api.py index 22419ac6..1640c68c 100644 --- a/dachar/fixes/fix_api.py +++ b/dachar/fixes/fix_api.py @@ -1,8 +1,11 @@ -import os import glob +import os from pydoc import locate +import dachar.fixes.array_fixes as array_fixes +import dachar.fixes.attr_fixes as attr_fixes import dachar.fixes.coord_fixes as coord_fixes +import dachar.fixes.var_fixes as var_fixes def get_fix_modules(): @@ -27,7 +30,7 @@ def get_fix_dict(): def get_fix(fix_id): if fix_id: - fix_cls = locate(f"dachar.fixes.coord_fixes.{fix_id}") + fix_cls = locate(f"dachar.fixes.{fix_id}") return fix_cls diff --git a/dachar/fixes/fix_processor.py b/dachar/fixes/fix_processor.py index 1b1d36a4..a2db0680 100644 --- a/dachar/fixes/fix_processor.py +++ b/dachar/fixes/fix_processor.py @@ -1,6 +1,8 @@ -from dachar.utils.get_stores import get_fix_prop_store, get_fix_store import pprint +from dachar.utils.get_stores import get_fix_prop_store +from dachar.utils.get_stores import get_fix_store + def get_proposed_fixes(ds_ids=None): if ds_ids is None: @@ -9,9 +11,10 @@ def get_proposed_fixes(ds_ids=None): proposed_fixes = [] for ds_id in ds_ids: - proposed_fix = get_fix_prop_store().get_proposed_fix_by_id(ds_id) - if proposed_fix is not None: - proposed_fixes.append(proposed_fix) + proposed_fix_list = get_fix_prop_store().get_proposed_fix_by_id(ds_id) + if proposed_fix_list is not None: + for fix in proposed_fix_list: + proposed_fixes.append(fix) return proposed_fixes @@ -19,11 +22,12 @@ def get_proposed_fixes(ds_ids=None): def process_proposed_fixes(proposed_fixes): if len(proposed_fixes) > 0: for proposed_fix in proposed_fixes: - fix = proposed_fix["fixes"][0]["fix"] ds_id = proposed_fix["dataset_id"] + fix = proposed_fix["this_fix"]["fix"] # print fix so user can see what they are processing - pprint.pprint(proposed_fix) + pprint.pprint(ds_id) + pprint.pprint(fix) action = input("Enter action for proposed fix: ") @@ -41,7 +45,7 @@ def process_proposed_fixes(proposed_fixes): else: # print('[INFO] You have not selected an action for this fix.') - pass + continue else: raise Exception("No proposed fixes found.") @@ -61,7 +65,7 @@ def get_fixes_to_withdraw(ds_ids): def process_withdraw_fixes(existing_fixes): if len(existing_fixes) > 0: for existing_fix in existing_fixes: - fix = existing_fix["fixes"][0] + # fix = existing_fix["fixes"][0] ds_id = existing_fix["dataset_id"] # print fix so user can see what they are processing diff --git a/dachar/fixes/fix_proposal_store.py b/dachar/fixes/fix_proposal_store.py index 9a7a8a69..b920813f 100644 --- a/dachar/fixes/fix_proposal_store.py +++ b/dachar/fixes/fix_proposal_store.py @@ -1,8 +1,9 @@ from copy import deepcopy -from dachar.utils.common import now_string -from dachar.utils.json_store import _LocalBaseJsonStore, _ElasticSearchBaseJsonStore from dachar import CONFIG +from dachar.utils.common import now_string +from dachar.utils.json_store import _ElasticSearchBaseJsonStore +from dachar.utils.json_store import _LocalBaseJsonStore class BaseFixProposalStore(object): @@ -32,9 +33,9 @@ class BaseFixProposalStore(object): 'reason': '', 'status': 'proposed', 'timestamp': '2020-04-29T14:41:52'}]} - - - Are title, description, category, ref_implementation needed here? + + + Are title, description, category, ref_implementation needed here? Should ncml be in fix? """ @@ -60,7 +61,7 @@ def _update_fix_container(self, container, fix, status, reason=""): 0, { "status": container["status"], - "timestamp": container["timestamp"], + "timestamp": now_string(), "reason": container["reason"], }, ) @@ -106,11 +107,15 @@ def _action_fix(self, ds_id, fix, status, reason=""): def get_proposed_fix_by_id(self, ds_id): # go through fixes and return if status is proposed + proposed_fixes = [] + if self.exists(ds_id): content = self.get(ds_id) for this_fix in content["fixes"]: if this_fix["status"] == "proposed": - return content + proposed_fixes.append({"dataset_id": ds_id, "this_fix": this_fix}) + + return proposed_fixes def get_proposed_fixes(self): # go through fixes and return if status is proposed @@ -121,7 +126,9 @@ def get_proposed_fixes(self): content = self.get(ds_id) for this_fix in content["fixes"]: if this_fix["status"] == "proposed": - proposed_fixes.append(content) + proposed_fixes.append( + {"dataset_id": ds_id, "this_fix": this_fix} + ) return proposed_fixes @@ -146,7 +153,7 @@ class ElasticFixProposalStore(BaseFixProposalStore, _ElasticSearchBaseJsonStore) config = { "store_type": "elasticsearch", - "index": CONFIG['elasticsearch']["fix_proposal_store"], - "api_token": CONFIG['dachar:settings']['elastic_api_token'], + "index": CONFIG["elasticsearch"]["fix_proposal_store"], + "api_token": CONFIG["dachar:settings"]["elastic_api_token"], "id_type": "dataset_id", } diff --git a/dachar/fixes/generate_proposals.py b/dachar/fixes/generate_proposals.py new file mode 100644 index 00000000..b0ba67f3 --- /dev/null +++ b/dachar/fixes/generate_proposals.py @@ -0,0 +1,78 @@ +import json +from pydoc import locate + +from dachar.utils.get_stores import get_fix_prop_store + + +def flatten_proposal(d): + keys = [] + for k, v in d.items(): + keys.append(k) + if isinstance(v, list): + for k1, v1 in v[0].items(): + keys.append(k1) + if k1 == "source": + for k2, v2 in v1.items(): + keys.append(k2) + return keys + + +def validate_proposal(proposal): + required = [ + "dataset_id", + "fixes", + "fix_id", + "operands", + "source", + "name", + "version", + "comments", + "url", + ] + + existing = flatten_proposal(proposal) + + missing = set(required).difference(set(existing)) + invalid = set(existing).difference(set(required)) + + if missing: + raise KeyError(f"Required fields not provided: {missing}") + + if invalid: + raise KeyError(f"Invalid fields provided: {invalid}") + + +def generate_fix_proposals(files): + for file in files: + if isinstance(file, dict): + proposal = file + else: + with open(file) as f: + proposal = json.load(f) + + validate_proposal(proposal) + ds_id = proposal.get("dataset_id") + for prop in proposal.get("fixes"): + fix_id = prop.get("fix_id") + fix_cls = locate(f"dachar.fixes.{fix_id}") + + source, operands = prop.get("source"), prop.get("operands") + + fix = fix_cls(ds_id, source=source, **operands) + d = fix.to_dict() + get_fix_prop_store().propose(d["dataset_id"]["ds_id"], d["fix"]) + + +def generate_proposal_from_template(template, ds_list): + with open(template) as f: + proposal_template = json.load(f) + + proposals = [] + with open(ds_list, "r") as f1: + for line in f1: + ds_id = line.strip() + proposal_template = proposal_template.copy() + proposal_template["dataset_id"] = ds_id + proposals.append(proposal_template) + + generate_fix_proposals(proposals) diff --git a/dachar/fixes/var_fixes.py b/dachar/fixes/var_fixes.py new file mode 100644 index 00000000..44e6c912 --- /dev/null +++ b/dachar/fixes/var_fixes.py @@ -0,0 +1,25 @@ +from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.utils.common import UNDEFINED + +__all__ = ["Reverse2DVarFix"] + + +class Reverse2DVarFix(_BaseDatasetFix): + fix_id = "Reverse2DVarFix" + title = "Reverse data of 2D Variables" + description = """ +"Reverses the order of the data of the given 2d variables + +Takes as an input the names of the variables to be reversed +as a list: + +For example: + - inputs: + { + "var_ids": ["a_bnds", "b_bnds"] + }, +""" + category = "var_fixes" + required_operands = ["var_ids"] + ref_implementation = "daops.data_utils.var_utils.reverse_2d_vars" + process_type = "post_processor" diff --git a/dachar/utils/create_index.py b/dachar/utils/create_index.py index ffa22676..5a6fc3b4 100644 --- a/dachar/utils/create_index.py +++ b/dachar/utils/create_index.py @@ -1,63 +1,118 @@ """ -Currently this script produces a index with today's date and creates an alias for it. -There is a function to populate the elasticsearch store with the contents of the local store -""" +This script can produce an index with today's date and update the alias to point to it. +There is also a function to populate the elasticsearch store with the contents of the local store. -import sys -import os -import pathlib +When updating the index: +- new index must be created with new date - clone_index_and_update_alias function creates this, fills with all documents from old index and updates the alias to point to it +- it can then be populated either with all documents in local store (populate_store) or one document at a time (add_document_to_index) +""" import hashlib import json +import os +import pathlib +import sys from datetime import datetime -from elasticsearch import Elasticsearch -from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient -from dachar.config import ELASTIC_API_TOKEN -from dachar import CONFIG -from dachar.utils.get_stores import ( - get_fix_store, - get_fix_prop_store, - get_dc_store, - get_ar_store, -) +from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient +from elasticsearch import Elasticsearch -from tests._stores_for_tests import ( - _TestFixProposalStore, - _TestFixStore, - _TestAnalysisStore, - _TestDatasetCharacterStore, +from dachar import CONFIG +from dachar.utils.get_stores import get_ar_store +from dachar.utils.get_stores import get_dc_store +from dachar.utils.get_stores import get_fix_prop_store +from dachar.utils.get_stores import get_fix_store + +# from tests._stores_for_tests import ( +# _TestFixProposalStore, +# _TestFixStore, +# _TestAnalysisStore, +# _TestDatasetCharacterStore, +# ) + +es = CEDAElasticsearchClient( + headers={"x-api-key": CONFIG["dachar:settings"]["elastic_api_token"]} ) -es = CEDAElasticsearchClient(headers={"x-api-key": ELASTIC_API_TOKEN}) - -# es.indices.delete(index="roocs-char-test", ignore=[400, 404]) +# es.indices.delete(index="roocs-fix-2020-10-12", ignore=[400, 404]) # print(es.indices.exists("roocs-char-test")) # es.indices.create("roocs-char-test") -date = datetime.today().strftime("%Y-%m-%d") +# date = datetime.today().strftime("%Y-%m-%d") # character store -char_name = CONFIG['elasticsearch']["character_store"] +char_name = CONFIG["elasticsearch"]["character_store"] # analysis store -a_name = CONFIG['elasticsearch']["analysis_store"] +a_name = CONFIG["elasticsearch"]["analysis_store"] # fix store -fix_name = CONFIG['elasticsearch']["fix_store"] +fix_name = CONFIG["elasticsearch"]["fix_store"] # fix proposal store -fix_prop_name = CONFIG['elasticsearch']["fix_proposal_store"] +fix_prop_name = CONFIG["elasticsearch"]["fix_proposal_store"] + +def create_index_and_alias(index_name, date): + """ + create an empty index and update the alias to point to it + """ -def create_index_and_alias(name): - exists = es.indices.exists(f"{name}-{date}") + exists = es.indices.exists(f"{index_name}-{date}") if not exists: - es.indices.create( - f"{name}-{date}" - ) # do I need to include a mapping - should be put in here - alias_exists = es.indices.exists_alias(name=f"{name}", index=f"{name}-{date}") + es.indices.create(f"{index_name}-{date}") + alias_exists = es.indices.exists_alias( + name=f"{index_name}", index=f"{index_name}-{date}" + ) if not alias_exists: - es.indices.put_alias(index=f"{name}-{date}", name=f"{name}") + es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": f"{index_name}", "index": "*"}}, + { + "add": { + "alias": f"{index_name}", + "index": f"{index_name}-{date}", + } + }, + ] + } + ) + # es.indices.put_alias(index=f"{name}-{date}", name=f"{name}") + + +def clone_index_and_update_alias(index_name, date, index_to_clone): + """ + clone an index and update the alias to point to the new index + """ + + exists = es.indices.exists(f"{index_name}-{date}") + if not exists: + es.indices.clone(index_to_clone, f"{index_name}-{date}") + alias_exists = es.indices.exists_alias( + name=f"{index_name}", index=f"{index_name}-{date}" + ) + if not alias_exists: + es.indices.update_aliases( + body={ + "actions": [ + {"remove": {"alias": f"{index_name}", "index": "*"}}, + { + "add": { + "alias": f"{index_name}", + "index": f"{index_name}-{date}", + } + }, + ] + } + ) + # es.indices.put_alias(index=f"{name}-{date}", name=f"{name}") def populate_store(local_store, index, id_type): + """ + Populates elasticsearch index from local store + :param local_store: local store object to populate from + :param index: Name of elasticsearch index to populate + :param id_type: what the id is called in the provided index i.e. either dataset_id (for fix, character and fix proposal store) or sample_id (for the analysis store) + """ + root = local_store.config.get( "local.base_dir" ) # change if wanting to use a test store @@ -74,20 +129,54 @@ def populate_store(local_store, index, id_type): print(drs) m = hashlib.md5() m.update(drs.encode("utf-8")) - id = m.hexdigest() + doc_id = m.hexdigest() doc = json.load(open(fpath)) # es.delete(index=index, id=id) - es.index(index=index, id=id, body=doc) + es.index(index=index, id=doc_id, body=doc) if id_type is not None: - es.update(index=index, id=id, body={"doc": {id_type: drs}}) + es.update(index=index, id=doc_id, body={"doc": {id_type: drs}}) + + +def add_document_to_index(fpath, drs, index, id_type): + """ + Add document to elasticsearch index. Uses given file path to json file and ds_id (drs). + """ + + mapper = {"__ALL__": "*"} + for find_s, replace_s in mapper.items(): + drs = drs.replace(find_s, replace_s) + + print(drs) + m = hashlib.md5() + m.update(drs.encode("utf-8")) + doc_id = m.hexdigest() + doc = json.load(open(fpath)) + # es.delete(index=index, id=id) + print(doc) + + es.index(index=index, id=doc_id, body=doc) + if id_type is not None: + es.update(index=index, id=doc_id, body={"doc": {id_type: drs}}) def main(): # for store in [char_name, a_name, fix_name, fix_prop_name]: - # create_index_and_alias(store) - - populate_store(_TestDatasetCharacterStore(), "roocs-char-2020-07-08", "dataset_id") + # es.indices.delete(index="roocs-fix-2021-06-16", ignore=[400, 404]) + es.indices.delete(index="roocs-fix-prop-2021-06-16", ignore=[400, 404]) + # create_index_and_alias(fix_name, "2021-06-16") + create_index_and_alias(fix_prop_name, "2021-06-16") + # # clone_index_and_update_alias(fix_name, "2021-06-15", "roocs-fix-2020-10-12")) + + # populate_store(get_fix_store(), "roocs-fix-2021-06-16", "dataset_id") + populate_store(get_fix_prop_store(), "roocs-fix-prop-2021-06-16", "dataset_id") + # add_document_to_index( + # "/home/users/esmith88/roocs/dachar/tests/test_fixes/decadal_fixes/decadal.json", + # "CMIP6.DCPP.MOHC.HadGEM3-GC31-MM.dcppA-hindcast.s2004-r3i1p1f2.Amon.pr.gn.v20200417", + # "roocs-fix-2020-10-12", + # "dataset_id", + # ) + # add_document_to_index("/tmp/fix-store/cmip5/output1/INM/inmcm4/rcp45/mon/ocean/Omon.r1i1p1.latest.zostoga.json", "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", "roocs-fix-2021-06-16", "dataset_id") if __name__ == "__main__": diff --git a/dachar/utils/json_store.py b/dachar/utils/json_store.py index e51012da..d7072d2d 100644 --- a/dachar/utils/json_store.py +++ b/dachar/utils/json_store.py @@ -1,12 +1,14 @@ -import json import hashlib +import json import os +from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient +from elasticsearch import Elasticsearch +from elasticsearch import helpers from .common import nested_lookup -from elasticsearch import Elasticsearch, helpers -from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient -from dachar import CONFIG, logging +from dachar import CONFIG +from dachar import logging LOGGER = logging.getLogger(__file__) @@ -237,7 +239,7 @@ class _ElasticSearchBaseJsonStore(_BaseJsonStore): config = { "store_type": "elasticsearch", "index": "", - "api_token": CONFIG['dachar:settings']['elastic_api_token'], + "api_token": CONFIG["dachar:settings"]["elastic_api_token"], "id_type": "id", } @@ -288,7 +290,7 @@ def _save(self, id, content): self.es.index(index=self.config.get("index"), body=content, id=id) - self._map(drs_id, reverse=True) # + self._map(drs_id, reverse=True) self.es.update( index=self.config.get("index"), id=id, @@ -332,7 +334,7 @@ def _search_fields(self, fields, term, query_type): results.append(each["_source"]) # ensure there are no duplicates of the same result - return list(dict((v[self.config.get("id_type")], v) for v in results).values()) + return list({v[self.config.get("id_type")]: v for v in results}.values()) def _search_all(self, term): @@ -347,7 +349,7 @@ def _search_all(self, term): results.append(each["_source"]) # ensure there are no duplicates of the same result - return list(dict((v[self.config.get("id_type")], v) for v in results).values()) + return list({v[self.config.get("id_type")]: v for v in results}.values()) def _field_requirements(self, fields, term, query_type): @@ -361,14 +363,16 @@ def search(self, term, exact=False, match_ids=True, fields=None): if isinstance(term, float) or isinstance(term, int): exact = True - LOGGER.info(f"Must search for exact value when the search term is a number, " - f"Changing search to exact=True" + LOGGER.info( + f"Must search for exact value when the search term is a number, " + f"Changing search to exact=True" ) if isinstance(term, str) and " " in term and exact is False: - LOGGER.info(f"Ensure the case of your search term is correct as this type of " - f"search is case sensitive. If you are not sure of the correct case change " - f"your search term to a one word search or use exact=True." + LOGGER.info( + f"Ensure the case of your search term is correct as this type of " + f"search is case sensitive. If you are not sure of the correct case change " + f"your search term to a one word search or use exact=True." ) if match_ids is True and exact is True: diff --git a/test/data/test_file.nc b/test/data/test_file.nc new file mode 100644 index 00000000..0afe76ae Binary files /dev/null and b/test/data/test_file.nc differ diff --git a/test/data/test_file_2.nc b/test/data/test_file_2.nc new file mode 100644 index 00000000..14b14044 Binary files /dev/null and b/test/data/test_file_2.nc differ diff --git a/tests/test_analyse/test_sample_analyser.py b/tests/test_analyse/test_sample_analyser.py index d9fc92c1..0c932d04 100644 --- a/tests/test_analyse/test_sample_analyser.py +++ b/tests/test_analyse/test_sample_analyser.py @@ -92,5 +92,5 @@ def test_analyse(load_esgf_test_data): def teardown_module(): - # clear_stores() - pass + # pass + clear_stores() diff --git a/tests/test_checks/test_coord_checks/test_MissingCoordCheck.py b/tests/test_checks/test_coord_checks/test_MissingCoordCheck.py index 0df66a58..0dc52173 100644 --- a/tests/test_checks/test_coord_checks/test_MissingCoordCheck.py +++ b/tests/test_checks/test_coord_checks/test_MissingCoordCheck.py @@ -1,14 +1,17 @@ -from dachar.analyse.checks.coord_checks import * import os import shutil +from unittest.mock import Mock + import pytest -from tests._stores_for_tests import _TestDatasetCharacterStore, _TestFixProposalStore -from dachar.scan.scan import scan_dataset, get_dataset_paths -from dachar.analyse.checks import _base_check from dachar import CONFIG -from unittest.mock import Mock +from dachar.analyse.checks import _base_check +from dachar.analyse.checks.coord_checks import MissingCoordCheck from dachar.scan import scan +from dachar.scan.scan import get_dataset_paths +from dachar.scan.scan import scan_dataset +from tests._stores_for_tests import _TestDatasetCharacterStore +from tests._stores_for_tests import _TestFixProposalStore char_store = None prop_store = None @@ -48,7 +51,7 @@ def populate_dc_store(ds_ids, project): scan.get_dc_store = Mock(return_value=char_store) ds_paths = get_dataset_paths( - project, ds_ids=ds_ids, paths=CONFIG[f'project:{project}']['base_dir'] + project, ds_ids=ds_ids, paths=CONFIG[f"project:{project}"]["base_dir"] ) for ds_id, ds_path in ds_paths.items(): scan_dataset(project, ds_id, ds_path, "full", "ceda") @@ -121,8 +124,8 @@ def test_MissingCoordCheck_cmip5(): results, atypical_content, typical_content = x.run() assert atypical_content[0]["coordinates.*.id"] == ["latitude", "longitude", "time"] assert typical_content["coordinates.*.id"] == [ - "height", "latitude", + "level", "longitude", "time", ] diff --git a/tests/test_fixes/esmval_test_fixes/cl.json b/tests/test_fixes/esmval_test_fixes/cl.json new file mode 100644 index 00000000..9391b8ad --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/cl.json @@ -0,0 +1,43 @@ +{ + "dataset_id": "CMIP6.CMIP.NCAR.CESM2.amip.r3i1p1f1.Amon.cl.gn.v20190319", + "fixes": [{ + "fix_id": "AttrFix", + "operands": { + "var_id": "lev", + "attrs": + {"standard_name": "atmosphere_hybrid_sigma_pressure_coordinate", + "formula_terms": "p0: p0 a: a b: b ps: ps"} + }, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/issues/538"} + }, + { + "fix_id": "ReverseCoordFix", + "operands": { + "coords": [ + "lev" + ]}, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/issues/538"} + }, + { + "fix_id": "Reverse2DVarFix", + "operands": { + "var_ids": [ + "a_bnds", + "b_bnds" + ] + }, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/issues/538"} + }] +} diff --git a/tests/test_fixes/esmval_test_fixes/cl_fix_ds_list.txt b/tests/test_fixes/esmval_test_fixes/cl_fix_ds_list.txt new file mode 100644 index 00000000..7d722c7d --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/cl_fix_ds_list.txt @@ -0,0 +1,13 @@ +CMIP6.CMIP.NCAR.CESM2.amip.r1i1p1f1.Amon.cl.gn.v20190319 +CMIP6.CMIP.NCAR.CESM2.amip.r2i1p1f1.Amon.cl.gn.v20190319 +CMIP6.CMIP.NCAR.CESM2.amip.r3i1p1f1.Amon.cl.gn.v20190319 +CMIP6.CMIP.NCAR.CESM2.historical.r10i1p1f1.Amon.cl.gn.v20190313 +CMIP6.CMIP.NCAR.CESM2.historical.r1i1p1f1.Amon.cl.gn.v20190308 +CMIP6.CMIP.NCAR.CESM2.historical.r2i1p1f1.Amon.cl.gn.v20190308 +CMIP6.CMIP.NCAR.CESM2.historical.r3i1p1f1.Amon.cl.gn.v20190308 +CMIP6.CMIP.NCAR.CESM2.historical.r4i1p1f1.Amon.cl.gn.v20190308 +CMIP6.CMIP.NCAR.CESM2.historical.r5i1p1f1.Amon.cl.gn.v20190308 +CMIP6.CMIP.NCAR.CESM2.historical.r6i1p1f1.Amon.cl.gn.v20190308 +CMIP6.CMIP.NCAR.CESM2.historical.r7i1p1f1.Amon.cl.gn.v20190311 +CMIP6.CMIP.NCAR.CESM2.historical.r8i1p1f1.Amon.cl.gn.v20190311 +CMIP6.CMIP.NCAR.CESM2.historical.r9i1p1f1.Amon.cl.gn.v20190311 diff --git a/tests/test_fixes/esmval_test_fixes/cl_template.json b/tests/test_fixes/esmval_test_fixes/cl_template.json new file mode 100644 index 00000000..aa5992c1 --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/cl_template.json @@ -0,0 +1,43 @@ +{ + "dataset_id": "", + "fixes": [{ + "fix_id": "AttrFix", + "operands": { + "var_id": "lev", + "attrs": + {"standard_name": "atmosphere_hybrid_sigma_pressure_coordinate", + "formula_terms": "p0: p0 a: a b: b ps: ps"} + }, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/issues/538"} + }, + { + "fix_id": "ReverseCoordFix", + "operands": { + "coords": [ + "lev" + ]}, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/issues/538"} + }, + { + "fix_id": "Reverse2DVarFix", + "operands": { + "var_ids": [ + "a_bnds", + "b_bnds" + ] + }, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/issues/538"} + }] +} diff --git a/tests/test_fixes/esmval_test_fixes/gpp.json b/tests/test_fixes/esmval_test_fixes/gpp.json new file mode 100644 index 00000000..6dce083c --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/gpp.json @@ -0,0 +1,14 @@ +{ + "dataset_id": "cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.rcp45.mon.land.Lmon.r1i1p1.latest.gpp", + "fixes": [{ + "fix_id": "MaskDataFix", + "operands": { + "value": "1.0e33" + }, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/issues/352"} + }] +} diff --git a/tests/test_fixes/esmval_test_fixes/gpp_fix_ds_list.txt b/tests/test_fixes/esmval_test_fixes/gpp_fix_ds_list.txt new file mode 100644 index 00000000..f4728b79 --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/gpp_fix_ds_list.txt @@ -0,0 +1,38 @@ +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.1pctCO2.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.1pctCO2.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.1pctCO2.mon.land.Lmon.r1i1p2.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.1pctCO2.mon.land.Lmon.r1i1p2.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmControl.mon.land.Lmon.r1i1p1.v20121029.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmControl.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmControl.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFdbk1.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFdbk1.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFdbk2.mon.land.Lmon.r1i1p2.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFdbk2.mon.land.Lmon.r1i1p2.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim1.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim1.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim1.mon.land.Lmon.r1i1p2.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim1.mon.land.Lmon.r1i1p2.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim1.mon.land.Lmon.r1i1p3.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim1.mon.land.Lmon.r1i1p3.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim2.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim2.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim2.mon.land.Lmon.r1i1p2.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmFixClim2.mon.land.Lmon.r1i1p2.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmHistorical.mon.land.Lmon.r1i1p1.v20121029.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmHistorical.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmrcp85.mon.land.Lmon.r1i1p1.v20121029.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmrcp85.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.esmrcp85.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.historical.mon.land.Lmon.r1i1p1.v20121029.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.historical.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.historical.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.piControl.mon.land.Lmon.r1i1p1.v20121029.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.piControl.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.piControl.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.rcp45.mon.land.Lmon.r1i1p1.v20121029.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.rcp45.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.rcp45.mon.land.Lmon.r1i1p1.v20140402.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.rcp85.mon.land.Lmon.r1i1p1.v20121029.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.rcp85.mon.land.Lmon.r1i1p1.v20130212.gpp +cmip5.output1.NSF-DOE-NCAR.CESM1-BGC.rcp85.mon.land.Lmon.r1i1p1.v20140402.gpp diff --git a/tests/test_fixes/esmval_test_fixes/gpp_template.json b/tests/test_fixes/esmval_test_fixes/gpp_template.json new file mode 100644 index 00000000..ae30d5a5 --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/gpp_template.json @@ -0,0 +1,14 @@ +{ + "dataset_id": "", + "fixes": [{ + "fix_id": "MaskDataFix", + "operands": { + "value": "1.0e33" + }, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/issues/352"} + }] +} diff --git a/tests/test_fixes/esmval_test_fixes/o2.json b/tests/test_fixes/esmval_test_fixes/o2.json new file mode 100644 index 00000000..083ea667 --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/o2.json @@ -0,0 +1,17 @@ +{ + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [{ + "fix_id": "MainVarAttrFix", + "operands": { + "attrs": + {"long_name": "Dissolved Oxygen Concentration", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + + }, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/cmor/_fixes/cmip5/hadgem2_cc.py#L34-L55"} + }] +} diff --git a/tests/test_fixes/esmval_test_fixes/o2_fix_ds_list.txt b/tests/test_fixes/esmval_test_fixes/o2_fix_ds_list.txt new file mode 100644 index 00000000..58afb32b --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/o2_fix_ds_list.txt @@ -0,0 +1,23 @@ +cmip5.output1.MOHC.HadGEM2-CC.historical.mon.ocnBgchem.Omon.r1i1p1.v20110930.o2 +cmip5.output1.MOHC.HadGEM2-CC.historical.mon.ocnBgchem.Omon.r2i1p1.v20111129.o2 +cmip5.output1.MOHC.HadGEM2-CC.historical.mon.ocnBgchem.Omon.r3i1p1.v20111216.o2 +cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.v20110930.o2 +cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r2i1p1.v20111129.o2 +cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r3i1p1.v20111216.o2 +cmip5.output1.MOHC.HadGEM2-CC.midHolocene.mon.ocnBgchem.Omon.r1i1p1.v20120223.o2 +cmip5.output1.MOHC.HadGEM2-CC.midHolocene.yr.ocnBgchem.Oyr.r1i1p1.v20120223.o2 +cmip5.output1.MOHC.HadGEM2-CC.piControl.mon.ocnBgchem.Omon.r1i1p1.v20111109.o2 +cmip5.output1.MOHC.HadGEM2-CC.piControl.yr.ocnBgchem.Oyr.r1i1p1.v20111109.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp45.mon.ocnBgchem.Omon.r1i1p1.v20111102.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp45.mon.ocnBgchem.Omon.r1i1p1.v20120531.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp45.yr.ocnBgchem.Oyr.r1i1p1.v20111102.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp45.yr.ocnBgchem.Oyr.r1i1p1.v20120531.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r1i1p1.v20111012.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r1i1p1.v20120531.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r2i1p1.v20111220.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r2i1p1.v20111221.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r3i1p1.v20120106.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.yr.ocnBgchem.Oyr.r1i1p1.v20111011.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.yr.ocnBgchem.Oyr.r1i1p1.v20120531.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.yr.ocnBgchem.Oyr.r2i1p1.v20111220.o2 +cmip5.output1.MOHC.HadGEM2-CC.rcp85.yr.ocnBgchem.Oyr.r3i1p1.v20120106.o2 diff --git a/tests/test_fixes/esmval_test_fixes/o2_template.json b/tests/test_fixes/esmval_test_fixes/o2_template.json new file mode 100644 index 00000000..962f371a --- /dev/null +++ b/tests/test_fixes/esmval_test_fixes/o2_template.json @@ -0,0 +1,16 @@ +{ + "dataset_id": "", + "fixes": [{ + "fix_id": "MainVarAttrFix", + "operands": { + "attrs": + {"long_name": "Dissolved Oxygen Concentration", + "standard_name": "mole_concentration_of_dissolved_molecular_oxygen_in_sea_water"} + }, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url":"https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/cmor/_fixes/cmip5/hadgem2_cc.py#L34-L55"} + }] +} diff --git a/tests/test_fixes/test_coord_fixes.py b/tests/test_fixes/test_coord_fixes.py index f1e9df7a..290027bd 100644 --- a/tests/test_fixes/test_coord_fixes.py +++ b/tests/test_fixes/test_coord_fixes.py @@ -1,8 +1,21 @@ -from dachar.fixes.coord_fixes import SqueezeDimensionsFix, AddScalarCoordFix +from dachar.fixes.coord_fixes import AddScalarCoordFix +from dachar.fixes.coord_fixes import SqueezeDimensionsFix + + +source = { + "name": "dachar", + "version": "test", + "comment": "No specific source provided - link to all fixes in dachar", + "url": "https://github.com/roocs/dachar/tree/master/dachar/fixes", +} def test_SqueezeDimensionsFix(): - fix = SqueezeDimensionsFix("cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", dims="lev") + fix = SqueezeDimensionsFix( + "cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", + dims="lev", + source=source, + ) assert fix.fix_id == "SqueezeDimensionsFix" assert fix.title == "Squeeze singleton dimensions of the main variable" @@ -12,7 +25,9 @@ def test_SqueezeDimensionsFix(): assert fix.ref_implementation == "daops.data_utils.coord_utils.squeeze_dims" assert fix.process_type == "post_processor" - assert fix.description == """ + assert ( + fix.description + == """ Takes a sequence of identifiers that specify the dimensions to be squeezed. For each dimension: @@ -32,6 +47,7 @@ def test_SqueezeDimensionsFix(): - shape = [1800] - rank = 1 """ + ) def test_AddScalarCoordFix(): @@ -50,17 +66,30 @@ def test_AddScalarCoordFix(): }, } - fix = AddScalarCoordFix("cmip5.output1.ICHEC.EC-EARTH.historical.mon.atmos.Amon.r1i1p1.latest.tas", **operands) + fix = AddScalarCoordFix( + "cmip5.output1.ICHEC.EC-EARTH.historical.mon.atmos.Amon.r1i1p1.latest.tas", + **operands, + source=source, + ) assert fix.fix_id == "AddScalarCoordFix" assert fix.title == "Add a coordinate" assert fix.category == "coord_fixes" - assert fix.required_operands == ["dtype", "id", "value", "length", "attrs", "coord_type"] + assert fix.required_operands == [ + "dtype", + "id", + "value", + "length", + "attrs", + "coord_type", + ] assert fix.ref_implementation == "daops.data_utils.coord_utils.add_scalar_coord" assert fix.process_type == "post_processor" - assert fix.description == """ + assert ( + fix.description + == """ Takes the coordinate to add along with its attributes For example: @@ -73,4 +102,5 @@ def test_AddScalarCoordFix(): Fix example: ds = ds.assign_coords(height=2.0) will add a scalar height coordinate with a value of 2.0 Attributes will be set by attrs: e.g. ds.attrs['units'] = 'm' - """ \ No newline at end of file + """ + ) diff --git a/tests/test_fixes/test_fix_class.py b/tests/test_fixes/test_fix_class.py index f8a29256..d2f26fbb 100644 --- a/tests/test_fixes/test_fix_class.py +++ b/tests/test_fixes/test_fix_class.py @@ -1,15 +1,22 @@ import importlib import pprint as pp -from dachar.fixes.fix_api import get_fix_categories, get_fix_dict, get_fix from dachar.fixes._base_fix import _BaseDatasetFix +from dachar.fixes.fix_api import get_fix +from dachar.fixes.fix_api import get_fix_categories +from dachar.fixes.fix_api import get_fix_dict def test_get_fix_categories(): - expected_fix_categories = ["coord_fixes"] + expected_fix_categories = ["array_fixes", "attr_fixes", "coord_fixes", "var_fixes"] assert get_fix_categories() == expected_fix_categories - expected_fix_dict = {"coord_fixes": ["SqueezeDimensionsFix", "AddScalarCoordFix"]} + expected_fix_dict = { + "coord_fixes": ["SqueezeDimensionsFix", "AddScalarCoordFix", "ReverseCoordFix"], + "array_fixes": ["MaskDataFix"], + "attr_fixes": ["MainVarAttrFix", "AttrFix"], + "var_fixes": ["Reverse2DVarFix"], + } assert get_fix_dict() == expected_fix_dict @@ -75,8 +82,16 @@ class _TestFix(_BaseDatasetFix): # } +source = { + "name": "dachar", + "version": "test", + "comment": "No specific source provided - link to all fixes in dachar", + "url": "https://github.com/roocs/dachar/tree/master/dachar/fixes", +} + + def test_eg_fix(): - fix = _TestFix("ds1", thing=23, other="hello") + fix = _TestFix("ds1", thing=23, other="hello", source=source) assert fix.description == _TestFix.description expected_dict = { @@ -89,7 +104,12 @@ def test_eg_fix(): "reference_implementation": _TestFix.ref_implementation, "process_type": _TestFix.process_type, "operands": {"thing": 23, "other": "hello"}, - "source": "dachar version 0.1.0", + "source": { + "name": "dachar", + "version": "test", + "comment": "No specific source provided - link to all fixes in dachar", + "url": "https://github.com/roocs/dachar/tree/master/dachar/fixes", + }, }, } diff --git a/tests/test_fixes/test_fix_store.py b/tests/test_fixes/test_fix_store.py index 6ca041a6..d986868f 100644 --- a/tests/test_fixes/test_fix_store.py +++ b/tests/test_fixes/test_fix_store.py @@ -1,9 +1,10 @@ import os import shutil -# Create a new dummy store to run tests on from tests._stores_for_tests import _TestFixStore +# Create a new dummy store to run tests on + # recs = [ # {'fix_id': 'Fix1', 'operands': {'arg1': '1'}, 'ncml': ''}, # {'fix_id': 'Fix2', 'operands': {'arg2': '2'}, 'ncml': ''} @@ -72,4 +73,5 @@ def test_withdraw_fix_1(): def teardown_module(): + # pass _clear_store() diff --git a/tests/test_fixes/test_process_fixes.py b/tests/test_fixes/test_process_fixes.py index 32d3adde..3ee80120 100644 --- a/tests/test_fixes/test_process_fixes.py +++ b/tests/test_fixes/test_process_fixes.py @@ -1,15 +1,15 @@ import os import shutil import subprocess +from unittest.mock import Mock + import mock import pytest -from tests._stores_for_tests import _TestFixProposalStore, _TestFixStore from dachar.fixes import fix_processor from dachar.utils.common import now_string - - -from unittest.mock import Mock +from tests._stores_for_tests import _TestFixProposalStore +from tests._stores_for_tests import _TestFixStore ds_ids = [ "ds.1.1.1.1.1.1", @@ -109,44 +109,40 @@ def test_get_2_proposed_fixes(): assert len(proposed_fixes) == 2 - assert (proposed_fixes[0]) == { + assert (proposed_fixes[1]) == { "dataset_id": "ds.1.1.1.1.1.1", - "fixes": [ - { - "fix": { - "category": "test_fixes", - "description": "Applies fix 1", - "fix_id": "Fix1", - "operands": {"arg1": "1"}, - "reference_implementation": "daops.test.test_fix1", - "title": "Apply Fix 1", - }, - "history": [], - "reason": "", - "status": "proposed", - "timestamp": now_string(), - } - ], + "this_fix": { + "fix": { + "category": "test_fixes", + "description": "Applies fix 1", + "fix_id": "Fix1", + "operands": {"arg1": "1"}, + "reference_implementation": "daops.test.test_fix1", + "title": "Apply Fix 1", + }, + "history": [], + "reason": "", + "status": "proposed", + "timestamp": now_string(), + }, } - assert proposed_fixes[1] == { + assert proposed_fixes[0] == { "dataset_id": "ds.2.1.1.1.1.1", - "fixes": [ - { - "fix": { - "category": "test_fixes", - "description": "Applies fix 2", - "fix_id": "Fix2", - "operands": {"arg2": "2"}, - "reference_implementation": "daops.test.test_fix2", - "title": "Apply Fix 2", - }, - "history": [], - "reason": "", - "status": "proposed", - "timestamp": now_string(), - } - ], + "this_fix": { + "fix": { + "category": "test_fixes", + "description": "Applies fix 2", + "fix_id": "Fix2", + "operands": {"arg2": "2"}, + "reference_implementation": "daops.test.test_fix2", + "title": "Apply Fix 2", + }, + "history": [], + "reason": "", + "status": "proposed", + "timestamp": now_string(), + }, } @@ -165,22 +161,20 @@ def test_get_1_proposed_fixes(): assert len(proposed_fixes) == 1 assert proposed_fixes[0] == { "dataset_id": "ds.1.1.1.1.1.1", - "fixes": [ - { - "fix": { - "category": "test_fixes", - "description": "Applies fix 1", - "fix_id": "Fix1", - "operands": {"arg1": "1"}, - "reference_implementation": "daops.test.test_fix1", - "title": "Apply Fix 1", - }, - "history": [], - "reason": "", - "status": "proposed", - "timestamp": now_string(), - } - ], + "this_fix": { + "fix": { + "category": "test_fixes", + "description": "Applies fix 1", + "fix_id": "Fix1", + "operands": {"arg1": "1"}, + "reference_implementation": "daops.test.test_fix1", + "title": "Apply Fix 1", + }, + "history": [], + "reason": "", + "status": "proposed", + "timestamp": now_string(), + }, } @@ -241,7 +235,7 @@ def test_withdraw_fix_not_found(): fix_processor.get_fix_store = Mock(return_value=f_store) with pytest.raises(Exception) as exc: fix_processor.process_all_fixes("withdraw", [ds_ids[1]]) - assert exc.value == "A fix could not be found." + assert exc.value.args[0] == "A fix could not be found." def teardown_module(): diff --git a/tests/test_fixes/test_propose_fixes.py b/tests/test_fixes/test_propose_fixes.py new file mode 100644 index 00000000..bb5a1f74 --- /dev/null +++ b/tests/test_fixes/test_propose_fixes.py @@ -0,0 +1,207 @@ +import os +import shutil +import subprocess +from unittest.mock import Mock + +import mock +import pytest + +from dachar.fixes import generate_proposals +from dachar.utils.common import now_string +from tests._stores_for_tests import _TestFixProposalStore + +prop_store = None +cwd = os.getcwd() + + +def clear_store(): + fp_dr = _TestFixProposalStore.config["local.base_dir"] + if os.path.isdir(fp_dr): + shutil.rmtree(fp_dr) + + +def setup_module(): + clear_store() + global prop_store + prop_store = _TestFixProposalStore() + + +def test_generate_proposal_json(): + file = [f"{cwd}/tests/test_fixes/esmval_test_fixes/o2.json"] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + generate_proposals.generate_fix_proposals(file) + record = prop_store.get_proposed_fix_by_id( + "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2" + ) + assert record[0]["this_fix"]["fix"]["fix_id"] == "MainVarAttrFix" + + +def test_generate_proposal_json_2_fixes(): + + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [ + { + "fix_id": "MainVarAttrFix", + "operands": { + "attrs": [ + "long_name,Dissolved Oxygen Concentration", + "standard_name,mole_concentration_of_dissolved_molecular_oxygen_in_sea_water", + ] + }, + "source": { + "name": "", + "version": "", + "comments": "testing 2 fixes proposed externally", + "url": "", + }, + }, + { + "fix_id": "SqueezeDimensionsFix", + "operands": {"dims": ["test"]}, + "source": { + "name": "", + "version": "", + "comments": "testing 2 fixes proposed externally", + "url": "", + }, + }, + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + generate_proposals.generate_fix_proposals(file) + record = prop_store.get_proposed_fix_by_id( + "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2" + ) + assert record[0]["this_fix"]["fix"]["fix_id"] == "MainVarAttrFix" + assert record[1]["this_fix"]["fix"]["fix_id"] == "SqueezeDimensionsFix" + + +def test_generate_proposal_template(): + ds_list = f"{cwd}/tests/test_fixes/esmval_test_fixes/o2_fix_ds_list.txt" + template = f"{cwd}/tests/test_fixes/esmval_test_fixes/o2_template.json" + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + generate_proposals.generate_proposal_from_template(template, ds_list) + record = prop_store.get_proposed_fix_by_id( + "cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r1i1p1.v20120531.o2" + ) + assert record[0]["this_fix"]["fix"]["fix_id"] == "MainVarAttrFix" + + +def test_generate_proposal_when_one_already_exists(): + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r1i1p1.v20120531.o2", + "fixes": [ + { + "fix_id": "SqueezeDimensionsFix", + "operands": {"dims": ["test"]}, + "source": { + "name": "", + "version": "", + "comments": "testing 2 fixes proposed externally", + "url": "", + }, + } + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + generate_proposals.generate_fix_proposals(file) + record = prop_store.get_proposed_fix_by_id( + "cmip5.output1.MOHC.HadGEM2-CC.rcp85.mon.ocnBgchem.Omon.r1i1p1.v20120531.o2" + ) + assert record[0]["this_fix"]["fix"]["fix_id"] == "MainVarAttrFix" + assert record[1]["this_fix"]["fix"]["fix_id"] == "SqueezeDimensionsFix" + + +def test_unexpected_operands(): + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [ + { + "fix_id": "MainVarAttrFix", + "operands": {"test": ["not_real"]}, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url": "https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/cmor/_fixes/cmip5/hadgem2_cc.py#L34-L55", + }, + } + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + with pytest.raises(KeyError) as exc: + generate_proposals.generate_fix_proposals(file) + assert exc.value.args[0] == "Required keyword argument(s) not provided: {'attrs'}" + + +def test_invalid_fields(): + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [ + { + "fox_id": "MainVarAttrFix", + "operands": {"attrs": ["not_real"]}, + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url": "https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/cmor/_fixes/cmip5/hadgem2_cc.py#L34-L55", + }, + } + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + with pytest.raises(KeyError) as exc: + generate_proposals.generate_fix_proposals(file) + assert exc.value.args[0] == "Required fields not provided: {'fix_id'}" + + +def test_missing_fields(): + file = [ + { + "dataset_id": "cmip5.output1.MOHC.HadGEM2-CC.historical.yr.ocnBgchem.Oyr.r1i1p1.latest.o2", + "fixes": [ + { + "fix_id": "MainVarAttrFix", + "source": { + "name": "esmvaltool", + "version": "2.0.0", + "comments": "", + "url": "https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/cmor/_fixes/cmip5/hadgem2_cc.py#L34-L55", + }, + } + ], + } + ] + + generate_proposals.get_fix_prop_store = Mock(return_value=prop_store) + + with pytest.raises(KeyError) as exc: + generate_proposals.generate_fix_proposals(file) + assert exc.value.args[0] == "Required fields not provided: {'operands'}" + + +def teardown_module(): + # pass + clear_store() diff --git a/tests/test_scan/test_scan.py b/tests/test_scan/test_scan.py index 1894dc0c..8e60ae25 100644 --- a/tests/test_scan/test_scan.py +++ b/tests/test_scan/test_scan.py @@ -142,7 +142,7 @@ def test_varying_coords_example_fail(create_netcdf_file, create_netcdf_file_2): # seems to keep one variable but joins the coordinate lists together -@pytest.mark.skip(reason="Can't test for this shape when using test data") +@pytest.mark.xfail(reason="Can't test for this shape when using test data") def test_varying_coords_example_succeed(): """ Tests what happens when opening files as mfdataset for which the coordinates vary """ ds = xr.open_mfdataset( @@ -155,7 +155,7 @@ def test_varying_coords_example_succeed(): ) -@pytest.mark.skip( +@pytest.mark.xfail( reason="Exception was: Cannot compare type 'Timestamp' with type 'DatetimeProlepticGregorian'" ) def test_time_axis_types_issue(): diff --git a/tests/test_scan/test_scan_datasets.py b/tests/test_scan/test_scan_datasets.py index d915c052..dab0ae91 100644 --- a/tests/test_scan/test_scan_datasets.py +++ b/tests/test_scan/test_scan_datasets.py @@ -1,12 +1,14 @@ import json -import pytest import os + +import pytest import xarray as xr +from .test_check_files import make_nc_modify_var_attr +from dachar import CONFIG +from dachar import logging from dachar.scan.scan import scan_datasets from dachar.utils import switch_ds -from dachar import CONFIG, logging -from .test_check_files import make_nc_modify_var_attr LOGGER = logging.getLogger(__file__) @@ -20,23 +22,6 @@ class TestCorruptJson: - @pytest.mark.skip("This ds id no longer creates a corrupt JSON file") - def test_corrupt_json_file(self): - """ Tests what happens when a JSON file exists but is incomplete due to an issue encoding.""" - ds_id = [ - "c3s-cordex.output.EUR-11.IPSL.MOHC-HadGEM2-ES.rcp85.r1i1p1.IPSL-WRF381P.v1.day.psl.v20190212" - ] - try: - scan_datasets( - project="c3s-cordex", - ds_ids=ds_id, - paths=CONFIG['project:c3s-cordex']['base_dir'], - mode="quick", - location="ceda", - ) - except json.decoder.JSONDecodeError as exc: - pass - def test_fake_corrupt_json_file(self, tmpdir): """ Creates a bad JSON file and tests the code responds properly""" try: @@ -70,7 +55,7 @@ def test_file_checker(self): ds_id = ["cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.latest.tas"] grouped_ds_id = switch_ds.get_grouped_ds_id(ds_id[0]) - CONFIG['project:cmip5']['base_dir'] = "tests/test_outputs/" + CONFIG["project:cmip5"]["base_dir"] = "tests/test_outputs/" failure_file = f"outputs/logs/failure/pre_extract_error/{grouped_ds_id}.log" json_file = f"outputs/logs/register/{grouped_ds_id}.json" @@ -83,7 +68,7 @@ def test_file_checker(self): scan_datasets( project="cmip5", ds_ids=ds_id, - paths=CONFIG['project:cmip5']['base_dir'], + paths=CONFIG["project:cmip5"]["base_dir"], mode="quick", location="ceda", ) diff --git a/tests/test_utils/test_base_dirs.py b/tests/test_utils/test_base_dirs.py index bdaf7cdb..dbe60c6c 100644 --- a/tests/test_utils/test_base_dirs.py +++ b/tests/test_utils/test_base_dirs.py @@ -1,61 +1,121 @@ +import configparser import os +import shutil import subprocess +import tempfile +import warnings +from unittest.mock import Mock import pytest -from dachar.scan.scan import scan_datasets from dachar import CONFIG +from dachar.scan import scan +from tests._stores_for_tests import _TestDatasetCharacterStore -@pytest.mark.skip("Fails - not possible locally") +# Must run with --noconftest flag + +char_store = None +cwd = os.getcwd() + + +def clear_store(): + dc_dr = _TestDatasetCharacterStore.config["local.base_dir"] + if os.path.isdir(dc_dr): + shutil.rmtree(dc_dr) + + +def setup_module(): + clear_store() + global char_store + char_store = _TestDatasetCharacterStore() + + +@pytest.mark.xfail( + reason="conftest overwrites base dire to test base dir. Will pass if run with --noconftest flag" +) +@pytest.mark.skipif( + os.path.isdir("/group_workspaces") is False, reason="data not available" +) def test_c3s_cmip5_base_dir(): """ Checks definition of c3s cmip5 base dir resolves to a real directory""" + scan.get_dc_store = Mock(return_value=char_store) + c3s_cmip5_id = [ "c3s-cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.tas.latest" ] - result = scan_datasets( + scan.scan_datasets( project="c3s-cmip5", ds_ids=c3s_cmip5_id, - paths=CONFIG['project:c3s-cmip5']['base_dir'], + paths=CONFIG["project:c3s-cmip5"]["base_dir"], mode="quick", location="ceda", ) assert os.path.exists( - "./outputs/register/c3s-cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon.r1i1p1.tas.latest.json" + os.path.join( + char_store.config.get("local.base_dir"), + "c3s-cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon.r1i1p1.tas.latest.json", + ) ) -@pytest.mark.skip("FAILS - c3s-cmip6 base dir not defined yet") +@pytest.mark.xfail( + reason="conftest overwrites base dire to test base dir. Will pass if run with --noconftest flag" +) +@pytest.mark.skipif(os.path.isdir("/badc") is False, reason="data not available") def test_c3s_cmip6_base_dir(): """ Checks definition of c3s cmip6 base dir resolves to a real directory""" + scan.get_dc_store = Mock(return_value=char_store) + c3s_cmip6_id = [ "c3s-cmip6.CMIP.MOHC.HadGEM3-GC31-LL.amip.r1i1p1f3.Emon.rls.gn.latest" ] - result = scan_datasets( + scan.scan_datasets( project="c3s-cmip6", ds_ids=c3s_cmip6_id, - paths=CONFIG['project:c3s-cmip6']['base_dir'], + paths=CONFIG["project:c3s-cmip6"]["base_dir"], mode="quick", location="ceda", ) + + # base dir not defined yet assert os.path.exists( - "./outputs/register/c3s-cmip6/CMIP/MOHC/HadGEM3-GC31-LL/amip/r1i1p1f3/Emon.rls.gn.latest.json" + os.path.join( + char_store.config.get("local.base_dir"), + "c3s-cmip6/CMIP/MOHC/HadGEM3-GC31-LL/amip/r1i1p1f3/Emon.rls.gn.latest.json", + ) ) -@pytest.mark.skip("Fails - not possible locally") +@pytest.mark.xfail( + reason="conftest overwrites base dire to test base dir. Will pass if run with --noconftest flag" +) +@pytest.mark.skipif( + os.path.isdir("/group_workspaces") is False, reason="data not available" +) def test_c3s_cordex_base_dir(): """ Checks definition of c3s cordex base dir resolves to a real directory""" + scan.get_dc_store = Mock(return_value=char_store) + c3s_cordex_id = [ "c3s-cordex.output.EUR-11.CNRM.CNRM-CERFACS-CNRM-CM5.rcp45.r1i1p1.CNRM-ALADIN53.v1.day.tas.v20150127" ] - result = scan_datasets( + scan.scan_datasets( project="c3s-cordex", ds_ids=c3s_cordex_id, - paths=CONFIG['project:c3s-cordex']['base_dir'], + paths=CONFIG["project:c3s-cordex"]["base_dir"], mode="quick", location="ceda", ) assert os.path.exists( - "./outputs/register/c3s-cordex/output/EUR-11/CNRM/CNRM-CERFACS-CNRM-CM5/rcp45/r1i1p1/CNRM-ALADIN53/v1.day.tas.v20150127.json" + os.path.join( + char_store.config.get("local.base_dir"), + "c3s-cordex/output/EUR-11/CNRM/CNRM-CERFACS-CNRM-CM5/rcp45/r1i1p1" + "/CNRM-ALADIN53/v1.day.tas.v20150127.json", + ) ) + + +def teardown_module(): + # pass + clear_store()