From 26d376525cf8d8ae125d99ee1c250636cd7ae361 Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Wed, 16 Aug 2017 01:26:25 +1000 Subject: [PATCH 01/12] adding test case for ereefs --- lib/bald/tests/integration/test_cdl_rdfgraph.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/bald/tests/integration/test_cdl_rdfgraph.py b/lib/bald/tests/integration/test_cdl_rdfgraph.py index ced6c8a..2601881 100644 --- a/lib/bald/tests/integration/test_cdl_rdfgraph.py +++ b/lib/bald/tests/integration/test_cdl_rdfgraph.py @@ -36,3 +36,15 @@ def test_multi_array_reference(self): with open(os.path.join(self.ttl_path, 'multi_array_reference.ttl'), 'r') as sf: expected_ttl = sf.read() self.assertEqual(expected_ttl, ttl) + + def test_ereefs(self): + with self.temp_filename('.nc') as tfile: + cdl_file = os.path.join(self.cdl_path, 'ereefs_gbr4_ncld.cdl') + subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) + root_container = bald.load_netcdf(tfile) + testPassed + try: + ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") + except TypeError: + self.fail("Test case could not convert ereefs CDL to RDF") + From 4cf1112745416d1dd365e3226669bbb7aef11664 Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Wed, 30 Aug 2017 18:08:06 +1000 Subject: [PATCH 02/12] Refactoring alias and prefix handling to variable --- lib/bald/__init__.py | 42 +++++++++++--- .../tests/integration/CDL/array_alias_v2.cdl | 58 +++++++++++++++++++ .../tests/integration/CDL/array_prefix_v2.cdl | 55 ++++++++++++++++++ .../integration/CDL/array_prefix_v2_full.cdl | 57 ++++++++++++++++++ lib/bald/tests/integration/test_cdl_v2.py | 40 +++++++++++++ lib/bald/validation.py | 2 + 6 files changed, 246 insertions(+), 8 deletions(-) create mode 100644 lib/bald/tests/integration/CDL/array_alias_v2.cdl create mode 100644 lib/bald/tests/integration/CDL/array_prefix_v2.cdl create mode 100644 lib/bald/tests/integration/CDL/array_prefix_v2_full.cdl create mode 100644 lib/bald/tests/integration/test_cdl_v2.py diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index 1503261..0d5c301 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -240,6 +240,7 @@ def __getitem__(self, item): def check_uri(self, uri): result = False + #print("Checking uri: " + uri) if self[uri].status_code == 200: result = True return result @@ -438,6 +439,11 @@ def rdfgraph(self): """ graph = rdflib.Graph() graph.bind('bald', 'http://binary-array-ld.net/latest/') + for prefix_name in self._prefixes: + #strip the double underscore suffix + new_name = prefix_name[:-2] + + graph.bind(new_name, self._prefixes[prefix_name]) graph = self.rdfnode(graph) return graph @@ -537,22 +543,34 @@ def load_netcdf(afilepath, uri=None): """ with load(afilepath) as fhandle: - prefix_group = (fhandle[fhandle.bald__isPrefixedBy] if + prefix_var_name = None + if hasattr(fhandle, 'bald__isPrefixedBy'): + prefix_var_name = fhandle.bald__isPrefixedBy + + prefix_var = (fhandle[fhandle.bald__isPrefixedBy] if hasattr(fhandle, 'bald__isPrefixedBy') else {}) prefixes = {} - if prefix_group: - prefixes = (dict([(prefix, getattr(prefix_group, prefix)) for - prefix in prefix_group.ncattrs()])) + if prefix_var is not {} : + prefixes = (dict([(prefix, getattr(prefix_var, prefix)) for + prefix in prefix_var.ncattrs()])) else: for k in fhandle.ncattrs(): if k.endswith('__'): prefixes[k] = getattr(fhandle, k) - alias_group = (fhandle[fhandle.bald__isAliasedBy] + + #print(prefixes) + + alias_var_name = None + if hasattr(fhandle, 'bald__isAliasedBy'): + alias_var_name = fhandle.bald__isAliasedBy + + alias_var = (fhandle[fhandle.bald__isAliasedBy] if hasattr(fhandle, 'bald__isAliasedBy') else {}) aliases = {} - if alias_group: - aliases = (dict([(alias, getattr(alias_group, alias)) - for alias in alias_group.ncattrs()])) + if alias_var != {}: + aliases = (dict([(alias, getattr(alias_var, alias)) + for alias in alias_var.ncattrs()])) + #print(aliases) attrs = {} for k in fhandle.ncattrs(): @@ -567,6 +585,10 @@ def load_netcdf(afilepath, uri=None): root_container.attrs['bald__contains'] = [] file_variables = {} for name in fhandle.variables: + #print(name) + if name == prefix_var_name or name == alias_var_name: + #print("Skipping " + name) + continue sattrs = fhandle.variables[name].__dict__.copy() # inconsistent use of '/'; fix it @@ -590,6 +612,10 @@ def load_netcdf(afilepath, uri=None): # cycle again and find references for name in fhandle.variables: + if name == prefix_var_name or name == alias_var_name: + #print("Skipping " + name) + continue + var = file_variables[name] # reverse lookup based on type to be added lookups = ['bald__references', 'bald__array'] diff --git a/lib/bald/tests/integration/CDL/array_alias_v2.cdl b/lib/bald/tests/integration/CDL/array_alias_v2.cdl new file mode 100644 index 0000000..4b6c2db --- /dev/null +++ b/lib/bald/tests/integration/CDL/array_alias_v2.cdl @@ -0,0 +1,58 @@ +netcdf tmpMwXy8U { +dimensions: + pdim0 = 11 ; + pdim1 = 17 ; +variables: + int prefix_list ; + prefix_list:bald__ = "http://binary-array-ld.net/latest/" ; + prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; + + int alias_list ; + alias_list:SDN_ParameterDiscoveryCode = "http://vocab.nerc.ac.uk/isoCodelists/sdnCodelists/cdicsrCodeList.xml#SDN_ParameterDiscoveryCode" ; + alias_list:BactTaxaAbundSed = "http://vocab.nerc.ac.uk/collection/P02/current/BAUC/" ; + alias_list:standard_name = "https://def.scitools.org.uk/CFTerms/standard_name" ; + alias_list:air_temperature = "http://vocab.nerc.ac.uk/collection/P07/current/CFSN0023/" ; + + int parent_variable(pdim0, pdim1) ; + parent_variable:rdf__type = "bald__Array" ; + parent_variable:SDN_ParameterDiscoveryCode = "BactTaxaAbundSed" ; + parent_variable:submursible_name = "Nautilus" ; + + int temp(pdim0, pdim1) ; + temp:standard_name = "air_temperature" ; + +// global attributes: + :_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ; + :rdf__type = "bald__Container" ; + :bald__isPrefixedBy = "prefix_list" ; + :bald__isAliasedBy = "alias_list" ; +data: + + parent_variable = + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; + + temp = + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; + + +} diff --git a/lib/bald/tests/integration/CDL/array_prefix_v2.cdl b/lib/bald/tests/integration/CDL/array_prefix_v2.cdl new file mode 100644 index 0000000..609c304 --- /dev/null +++ b/lib/bald/tests/integration/CDL/array_prefix_v2.cdl @@ -0,0 +1,55 @@ +netcdf tmpMwXy8U { +dimensions: + pdim0 = 11 ; + pdim1 = 17 ; +variables: + int prefix_list ; + prefix_list:bald__ = "http://binary-array-ld.net/latest/" ; + prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; + prefix_list:sdn__ = "http://vocab.nerc.ac.uk/isoCodelists/sdnCodelists/cdicsrCodeList.xml#" ; + prefix_list:sdn-vocab__= "http://vocab.nerc.ac.uk/collection/P02/current/" ; + prefix_list:cf__ = "http://def.scitools.org.uk/CFTerms/" ; + prefix_list:cfsn__ = "http://vocab.nerc.ac.uk/collection/P07/current/CFSN0023/" ; + + int parent_variable(pdim0, pdim1) ; + parent_variable:rdf__type = "bald__Array" ; + parent_variable:sdn__SDN_ParameterDiscoveryCode = "sdn-vocab__BAUC" ; + parent_variable:submursible_name = "Nautilus" ; + + int temp(pdim0, pdim1) ; + temp:standard_name = "air_temperature" ; + +// global attributes: + :_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ; + :rdf__type = "bald__Container" ; + :bald__isPrefixedBy = "prefix_list" ; +data: + + parent_variable = + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; + + temp = + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; + + +} diff --git a/lib/bald/tests/integration/CDL/array_prefix_v2_full.cdl b/lib/bald/tests/integration/CDL/array_prefix_v2_full.cdl new file mode 100644 index 0000000..9272f74 --- /dev/null +++ b/lib/bald/tests/integration/CDL/array_prefix_v2_full.cdl @@ -0,0 +1,57 @@ +netcdf tmpMwXy8U { +dimensions: + pdim0 = 11 ; + pdim1 = 17 ; +variables: + int prefix_list ; + prefix_list:bald__ = "http://binary-array-ld.net/latest/" ; + prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; + prefix_list:sdn__ = "http://vocab.nerc.ac.uk/isoCodelists/sdnCodelists/cdicsrCodeList.xml#" ; + prefix_list:sdn-vocab__= "http://vocab.nerc.ac.uk/collection/P02/current/" ; + prefix_list:cf__ = "http://def.scitools.org.uk/CFTerms/" ; + prefix_list:cfsn-mmi__ = "http://mmisw.org/ont/cf/parameter/" ; + prefix_list:cfsn-nerc__ = "http://vocab.nerc.ac.uk/collection/P07/current/"; + + int parent_variable(pdim0, pdim1) ; + parent_variable:rdf__type = "bald__Array" ; + parent_variable:sdn__SDN_ParameterDiscoveryCode = "sdn-vocab__BAUC" ; + parent_variable:submursible_name = "Nautilus" ; + + int temp(pdim0, pdim1) ; + temp:cf__standard_name = "cfsn-mmi__air_temperature" ; + //temp:cf__standard_name = "cfsn-nerc__CFSN0023"; + +// global attributes: + :_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ; + :rdf__type = "bald__Container" ; + :bald__isPrefixedBy = "prefix_list" ; +data: + + parent_variable = + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; + + temp = + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; + + +} diff --git a/lib/bald/tests/integration/test_cdl_v2.py b/lib/bald/tests/integration/test_cdl_v2.py new file mode 100644 index 0000000..a20a1d2 --- /dev/null +++ b/lib/bald/tests/integration/test_cdl_v2.py @@ -0,0 +1,40 @@ +import glob +import os +import subprocess +import unittest + +import netCDF4 +import numpy as np + +import bald +from bald.tests import BaldTestCase + + +class Test(BaldTestCase): + def setUp(self): + self.cdl_path = os.path.join(os.path.dirname(__file__), 'CDL') + + +def test_prefix_v2(self): + """Test prefix version 2 style """ + with self.temp_filename('.nc') as tfile: + cdl_file = os.path.join(self.cdl_path, 'array_prefix_v2.cdl') + print(cdl_file) + subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) + validation = bald.validate_netcdf(tfile) + exns = validation.exceptions() + self.assertTrue(validation.is_valid(), msg='{} != []'.format(exns)) + +setattr(Test, 'test_prefix_v2', test_prefix_v2) + +def test_prefix_v2_full(self): + """Test prefix version 2 style - full example""" + with self.temp_filename('.nc') as tfile: + cdl_file = os.path.join(self.cdl_path, 'array_prefix_v2_full.cdl') + print(cdl_file) + subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) + validation = bald.validate_netcdf(tfile) + exns = validation.exceptions() + self.assertTrue(validation.is_valid(), msg='{} != []'.format(exns)) + +setattr(Test, 'test_prefix_v2_full', test_prefix_v2_full) diff --git a/lib/bald/validation.py b/lib/bald/validation.py index 60a7b59..44ea115 100644 --- a/lib/bald/validation.py +++ b/lib/bald/validation.py @@ -90,9 +90,11 @@ def _check_uri(uri, exceptions): exceptions.append(msg) return exceptions + ''' Skip checking prefixes as whole graphs could be big! for pref, uri in self.subject.prefixes().items(): exceptions = _check_uri(self.subject.unpack_uri(uri), exceptions) + ''' for alias, uri in self.subject.aliases.items(): exceptions = _check_uri(self.subject.unpack_uri(uri), exceptions) From eed068768693425c02750a8628d3536623992436 Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Wed, 30 Aug 2017 21:09:01 +1000 Subject: [PATCH 03/12] fixing bug in prefix handling --- lib/bald/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index 0d5c301..048cd0b 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -550,7 +550,7 @@ def load_netcdf(afilepath, uri=None): prefix_var = (fhandle[fhandle.bald__isPrefixedBy] if hasattr(fhandle, 'bald__isPrefixedBy') else {}) prefixes = {} - if prefix_var is not {} : + if prefix_var != {} : prefixes = (dict([(prefix, getattr(prefix_var, prefix)) for prefix in prefix_var.ncattrs()])) else: From 8a64095cb85f4817703ceeeb42b8dcecba0ece7c Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Wed, 30 Aug 2017 21:25:53 +1000 Subject: [PATCH 04/12] fixing tests for cdl rdfgraph --- .../tests/integration/CDL/multi_array_reference.cdl | 12 +++++------- .../tests/integration/TTL/multi_array_reference.ttl | 5 +++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/bald/tests/integration/CDL/multi_array_reference.cdl b/lib/bald/tests/integration/CDL/multi_array_reference.cdl index ccf2e3f..fe4241e 100644 --- a/lib/bald/tests/integration/CDL/multi_array_reference.cdl +++ b/lib/bald/tests/integration/CDL/multi_array_reference.cdl @@ -3,6 +3,11 @@ dimensions: pdim0 = 11 ; pdim1 = 17 ; variables: + int prefix_list(pdim0, pdim1) ; + prefix_list:bald__ = "http://binary-array-ld.net/latest/" ; + prefix_list:metce__ = "http://codes.wmo.int/common/observation-type/METCE/2013/" ; + prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; + int variable1(pdim0, pdim1) ; variable1:bald__references = "location_variable" ; variable1:long_name = "Gerald"; @@ -31,11 +36,4 @@ variables: :_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ; :bald__isPrefixedBy = "prefix_list" ; -group: prefix_list { - - // group attributes: - :bald__ = "http://binary-array-ld.net/latest/" ; - :metce__ = "http://codes.wmo.int/common/observation-type/METCE/2013/" ; - :rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; - } // group bald__prefix_list } diff --git a/lib/bald/tests/integration/TTL/multi_array_reference.ttl b/lib/bald/tests/integration/TTL/multi_array_reference.ttl index e495087..f42a3c5 100644 --- a/lib/bald/tests/integration/TTL/multi_array_reference.ttl +++ b/lib/bald/tests/integration/TTL/multi_array_reference.ttl @@ -1,4 +1,5 @@ @prefix bald: . +@prefix metce: . @prefix rdf: . @prefix rdfs: . @prefix xml: . @@ -21,7 +22,7 @@ ; bald:shape "(11, 17)" ; "Gerald" ; - . + metce:SamplingObservation . a bald:Array ; bald:references , @@ -29,7 +30,7 @@ ; bald:shape "(11, 17)" ; "Imelda" ; - . + metce:SamplingObservation . a bald:Reference, bald:Subject ; From 2857d4fb72d87080e5e413af0ad18aacfa199e6d Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Wed, 30 Aug 2017 21:36:53 +1000 Subject: [PATCH 05/12] updating alias tests and cdl --- lib/bald/tests/integration/CDL/array_alias_v2.cdl | 2 +- lib/bald/tests/integration/test_cdl_v2.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/bald/tests/integration/CDL/array_alias_v2.cdl b/lib/bald/tests/integration/CDL/array_alias_v2.cdl index 4b6c2db..10fc484 100644 --- a/lib/bald/tests/integration/CDL/array_alias_v2.cdl +++ b/lib/bald/tests/integration/CDL/array_alias_v2.cdl @@ -10,7 +10,7 @@ variables: int alias_list ; alias_list:SDN_ParameterDiscoveryCode = "http://vocab.nerc.ac.uk/isoCodelists/sdnCodelists/cdicsrCodeList.xml#SDN_ParameterDiscoveryCode" ; alias_list:BactTaxaAbundSed = "http://vocab.nerc.ac.uk/collection/P02/current/BAUC/" ; - alias_list:standard_name = "https://def.scitools.org.uk/CFTerms/standard_name" ; + alias_list:standard_name = "http://def.scitools.org.uk/CFTerms/standard_name" ; alias_list:air_temperature = "http://vocab.nerc.ac.uk/collection/P07/current/CFSN0023/" ; int parent_variable(pdim0, pdim1) ; diff --git a/lib/bald/tests/integration/test_cdl_v2.py b/lib/bald/tests/integration/test_cdl_v2.py index a20a1d2..fc5e030 100644 --- a/lib/bald/tests/integration/test_cdl_v2.py +++ b/lib/bald/tests/integration/test_cdl_v2.py @@ -15,6 +15,18 @@ def setUp(self): self.cdl_path = os.path.join(os.path.dirname(__file__), 'CDL') +def test_alias_v2(self): + """Test alias version 2 style """ + with self.temp_filename('.nc') as tfile: + cdl_file = os.path.join(self.cdl_path, 'array_alias_v2.cdl') + print(cdl_file) + subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) + validation = bald.validate_netcdf(tfile) + exns = validation.exceptions() + self.assertTrue(validation.is_valid(), msg='{} != []'.format(exns)) + +setattr(Test, 'test_alias_v2', test_alias_v2) + def test_prefix_v2(self): """Test prefix version 2 style """ with self.temp_filename('.nc') as tfile: From fcce72fdb922f0c0ccf95d9a0ce3c4dc47b9ad30 Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Wed, 30 Aug 2017 23:21:28 +1000 Subject: [PATCH 06/12] updating OISST test --- lib/bald/tests/integration/test_cdl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/bald/tests/integration/test_cdl.py b/lib/bald/tests/integration/test_cdl.py index 92f0540..a7f87d1 100644 --- a/lib/bald/tests/integration/test_cdl.py +++ b/lib/bald/tests/integration/test_cdl.py @@ -64,7 +64,8 @@ def test_grid_OISST_GHRSST(self): validation = bald.validate_netcdf(tfile) exns = validation.exceptions() exns.sort() - expected = ['http://www.ncdc.noaa.gov/sst is not resolving as a resource (404).', + expected = ['http://doi.org/10.7289/V5SQ8XB5 is not resolving as a resource (404).', + 'http://www.ncdc.noaa.gov/sst is not resolving as a resource (404).', 'http://www.ncdc.noaa.gov/sst/ is not resolving as a resource (404).'] expected.sort() self.assertTrue(not validation.is_valid() and exns == expected, From 847cee98d7f0859212f9a68a97d1ee53eab0a42d Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Mon, 4 Sep 2017 14:33:59 +1000 Subject: [PATCH 07/12] adding default prefixes in bald lib --- lib/bald/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index 048cd0b..9cfbae7 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -558,6 +558,13 @@ def load_netcdf(afilepath, uri=None): if k.endswith('__'): prefixes[k] = getattr(fhandle, k) + # check that default set is handled, i.e. bald__ and rdf__ + if 'bald__' not in prefixes: + prefixes['bald__'] = "http://binary-array-ld.net/latest/" + + if 'rdf__' not in prefixes: + prefixes['rdf__'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + #print(prefixes) alias_var_name = None From 1745701ba210b0c38191eff3a4e88aea5e514168 Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Mon, 4 Sep 2017 15:10:50 +1000 Subject: [PATCH 08/12] implementing parameters to set graph uris and base uris --- lib/bald/__init__.py | 9 +++++++-- nc2rdf/nc2rdf.py | 14 ++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index 9cfbae7..4f103d7 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -536,7 +536,7 @@ def load(afilepath): finally: f.close() -def load_netcdf(afilepath, uri=None): +def load_netcdf(afilepath, uri=None, baseuri=None): """ Validate a file with respect to binary-array-linked-data. Returns a :class:`bald.validation.Validation` @@ -600,11 +600,14 @@ def load_netcdf(afilepath, uri=None): sattrs = fhandle.variables[name].__dict__.copy() # inconsistent use of '/'; fix it identity = name + if baseuri is not None: + identity = baseuri + "/" + name # netCDF coordinate variable special case if (len(fhandle.variables[name].dimensions) == 1 and fhandle.variables[name].dimensions[0] == name): - sattrs['bald__array'] = name + #sattrs['bald__array'] = name + sattrs['bald__array'] = identity sattrs['rdf__type'] = 'bald__Reference' if fhandle.variables[name].shape: @@ -648,6 +651,8 @@ def load_netcdf(afilepath, uri=None): # Else, define a bald:childBroadcast else: identity = '{}_{}_ref'.format(name, dim) + if baseuri is not None: + identity = baseuri + '/' + '{}_{}_ref'.format(name, dim) rattrs = {} rattrs['rdf__type'] = 'bald__Reference' reshape = [1 for adim in var_shape] diff --git a/nc2rdf/nc2rdf.py b/nc2rdf/nc2rdf.py index 120edac..2f3ab82 100644 --- a/nc2rdf/nc2rdf.py +++ b/nc2rdf/nc2rdf.py @@ -6,21 +6,21 @@ import numpy as np import bald -def nc2rdf(ncfilename, outformat): +def nc2rdf(ncfilename, outformat, container_uri=None, default_baseuri=None): #print("nc2rdf test") #print(ncfile) - root_container = bald.load_netcdf(ncfilename) + root_container = bald.load_netcdf(ncfilename, uri=container_uri, baseuri=default_baseuri) ttl = root_container.rdfgraph().serialize(format=outformat).decode("utf-8") print(ttl) -def cdl2rdf(cdl_file, outformat): +def cdl2rdf(cdl_file, outformat, container_uri=None, default_baseuri=None): #print("cdl2rdf test") #print(cdl_file) tfile, tfilename = tempfile.mkstemp('.nc') #print(tfilename) subprocess.check_call(['ncgen', '-o', tfilename, cdl_file]) - nc2rdf(tfilename, outformat) + nc2rdf(tfilename, outformat, container_uri=container_uri, default_baseuri=default_baseuri) os.close(tfile) os.remove(tfilename) @@ -29,6 +29,8 @@ def cdl2rdf(cdl_file, outformat): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Convert netCDF metadata to RDF.') parser.add_argument('-o', action="store", dest="format", default='n3', help="RDF output format (n3 *default, ttl, xml)") + parser.add_argument('--uri', action="store", dest="uri", help="Container URI for the root of the graph") + parser.add_argument('--baseuri', action="store", dest="baseuri", help="Base URI for the graph") parser.add_argument('--cdl', action="store_true", dest="isCDL", default=False, help="Flag to indicate file is CDL") parser.add_argument('--nc', action="store_true", dest="isNC", default=False, help="Flag to indicate file is netCDF") parser.add_argument("ncfile", help="Path for the netCDF file") @@ -36,8 +38,8 @@ def cdl2rdf(cdl_file, outformat): args = parser.parse_args() if(args.isCDL or args.ncfile.endswith(".cdl") or args.ncfile.endswith('.CDL')): - cdl2rdf(args.ncfile, args.format) + cdl2rdf(args.ncfile, args.format, container_uri=args.uri, default_baseuri=args.baseuri) elif(args.isNC or args.ncfile.endswith(".nc") or args.ncfile.endswith('.NC')): - nc2rdf(args.ncfile, args.format) + nc2rdf(args.ncfile, args.format, container_uri=args.uri, default_baseuri=args.baseuri) else: print("Unrecognised file suffix. Please indicate if CDL or NC via --cdl or --nc"); From 9d2f96c9b48c7ca294ef6bb6282250c4f7a3a2a1 Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Wed, 6 Sep 2017 11:54:41 +1000 Subject: [PATCH 09/12] fixing refs to alias_group and prefix_group to vars --- lib/bald/__init__.py | 16 +- .../tests/integration/CDL/array_alias.cdl | 37 +--- .../tests/integration/CDL/array_reference.cdl | 37 +--- ncldDump/aliases.json | 190 +++++++++--------- 4 files changed, 119 insertions(+), 161 deletions(-) diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index 4f103d7..a25653e 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -550,9 +550,13 @@ def load_netcdf(afilepath, uri=None, baseuri=None): prefix_var = (fhandle[fhandle.bald__isPrefixedBy] if hasattr(fhandle, 'bald__isPrefixedBy') else {}) prefixes = {} - if prefix_var != {} : + + skipped_variables = [] + if prefix_var != {}: prefixes = (dict([(prefix, getattr(prefix_var, prefix)) for prefix in prefix_var.ncattrs()])) + if isinstance(prefix_var, netCDF4._netCDF4.Variable): + skipped_variables.append(prefix_var.name) else: for k in fhandle.ncattrs(): if k.endswith('__'): @@ -577,7 +581,8 @@ def load_netcdf(afilepath, uri=None, baseuri=None): if alias_var != {}: aliases = (dict([(alias, getattr(alias_var, alias)) for alias in alias_var.ncattrs()])) - #print(aliases) + if isinstance(alias_var, netCDF4._netCDF4.Variable): + skipped_variables.append(alias_var.name) attrs = {} for k in fhandle.ncattrs(): @@ -609,13 +614,16 @@ def load_netcdf(afilepath, uri=None, baseuri=None): #sattrs['bald__array'] = name sattrs['bald__array'] = identity sattrs['rdf__type'] = 'bald__Reference' - if fhandle.variables[name].shape: sattrs['bald__shape'] = fhandle.variables[name].shape var = Array(identity, sattrs, prefixes=prefixes, aliases=aliases) else: var = Subject(identity, sattrs, prefixes=prefixes, aliases=aliases) - root_container.attrs['bald__contains'].append(var) + if name not in skipped_variables: + # Don't include skipped variables, such as prefix or alias + # variables, within the containment relation. + root_container.attrs['bald__contains'].append(var) + file_variables[name] = var diff --git a/lib/bald/tests/integration/CDL/array_alias.cdl b/lib/bald/tests/integration/CDL/array_alias.cdl index c442cb6..2576188 100644 --- a/lib/bald/tests/integration/CDL/array_alias.cdl +++ b/lib/bald/tests/integration/CDL/array_alias.cdl @@ -8,38 +8,19 @@ variables: parent_variable:SDN_ParameterDiscoveryCode = "BactTaxaAbundSed" ; parent_variable:submursible_name = "Nautilus" ; + int prefix_list ; + prefix_list:bald__ = "http://binary-array-ld.net/latest/" ; + prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; + + int alias_list ; + alias_list:SDN_ParameterDiscoveryCode = "http://vocab.nerc.ac.uk/isoCodelists/sdnCodelists/cdicsrCodeList.xml#SDN_ParameterDiscoveryCode" ; + alias_list:BactTaxaAbundSed = "http://vocab.nerc.ac.uk/collection/P02/current/BAUC/" ; + + // global attributes: :_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ; :rdf__type = "bald__Container" ; :bald__isPrefixedBy = "prefix_list" ; :bald__isAliasedBy = "alias_list" ; -data: - - parent_variable = - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; - - -group: prefix_list { - - // group attributes: - :bald__ = "http://binary-array-ld.net/latest/" ; - :rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; - } // group bald__prefix_list - -group: alias_list { - // group attributes: - :SDN_ParameterDiscoveryCode = "http://vocab.nerc.ac.uk/isoCodelists/sdnCodelists/cdicsrCodeList.xml#SDN_ParameterDiscoveryCode" ; - :BactTaxaAbundSed = "http://vocab.nerc.ac.uk/collection/P02/current/BAUC/" ; - } // group bald__alias_list } diff --git a/lib/bald/tests/integration/CDL/array_reference.cdl b/lib/bald/tests/integration/CDL/array_reference.cdl index 9f7913a..3c2b072 100644 --- a/lib/bald/tests/integration/CDL/array_reference.cdl +++ b/lib/bald/tests/integration/CDL/array_reference.cdl @@ -9,43 +9,12 @@ variables: int child_variable(pdim0, pdim1) ; child_variable:rdf__type = "bald__Reference" ; child_variable:bald__array = "child_variable" ; + int prefix_list ; + prefix_list:bald__ = "http://binary-array-ld.net/latest/" ; + prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; // global attributes: :_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ; :rdf__type = "bald__Container" ; :bald__isPrefixedBy = "prefix_list" ; -data: - - parent_variable = - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; - - child_variable = - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, - _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; - -group: prefix_list { - - // group attributes: - :bald__ = "http://binary-array-ld.net/latest/" ; - :rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; - } // group bald__prefix_list } diff --git a/ncldDump/aliases.json b/ncldDump/aliases.json index 1af38c0..f31bb57 100644 --- a/ncldDump/aliases.json +++ b/ncldDump/aliases.json @@ -6,101 +6,101 @@ }, "names" : { - "Conventions" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#Conventions", - "_FillValue" : "http://www.unidata.ucar.edu/netcdf/docs/netcdf.html#Attribute-Conventions", - "acknowledgement" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#acknowledgement", - "add_offset" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#packed-data", - "ancillary_variables" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#ancillary-data", - "axis" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#coordinate-types", - "bounds" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#cell-boundaries", - "calendar" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#calendar", - "cdm_data_type" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#cdm_data_type", - "cell_measures" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#cell-measures", - "cell_methods" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#cell-methods", - "cf_role" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#coordinates-metadata", - "climatology" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#climatological-statistics", - "comment" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#comment", - "compress" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#reduced-horizontal-grid", - "contributor_name" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#contributor_name", - "contributor_role" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#contributor_role", - "coordinates" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#coordinate-system", - "coverage_content_type" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#coverage_content_type", - "creator_email" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#creator_email", - "creator_institution" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#creator_institution", - "creator_name" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#creator_name", - "creator_type" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#creator_type", - "creator_url" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#creator_url", - "date_created" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#date_created", - "date_issued" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#date_issued", - "date_metadata_modified" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#date_metadata_modified", - "date_modified" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#date_modified", - "featureType" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#featureType", - "flag_masks" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#flags", - "flag_meanings" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#flags", - "flag_values" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#flags", - "formula_terms" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#dimensionless-vertical-coordinate", - "geospatial_bounds" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds", - "geospatial_bounds_crs" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds_crs", - "geospatial_bounds_vertical_crs" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_bounds_vertical_crs", - "geospatial_lat_max" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_lat_max", - "geospatial_lat_min" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_lat_min", - "geospatial_lat_resolution" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_lat_resolution", - "geospatial_lat_units" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_lat_units", - "geospatial_lon_max" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_lon_max", - "geospatial_lon_min" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_lon_min", - "geospatial_lon_resolution" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_lon_resolution", - "geospatial_lon_units" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_lon_units", - "geospatial_vertical_max" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_vertical_max", - "geospatial_vertical_min" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_vertical_min", - "geospatial_vertical_positive" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_vertical_positive", - "geospatial_vertical_resolution" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_vertical_resolution", - "geospatial_vertical_units" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#geospatial_vertical_units", - "grid_mapping" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#grid-mappings-and-projections", - "history" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#history", - "id" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#id", - "instance_dimension" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#representations-features", - "institution" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#institution", - "instrument" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#instrument", - "instrument_vocabulary" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#instrument_vocabulary", - "keywords" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#keywords", - "keywords_vocabulary" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#keywords_vocabulary", - "leap_month" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#calendar", - "leap_year" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#calendar", - "license" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#license", - "long_name" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#long-name", - "metadata_link" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#metadata_link", - "missing_value" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#missing-data", - "month_lengths" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#calendar", - "naming_authority" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#naming_authority", - "platform" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#platform", - "platform_vocabulary" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#platform_vocabulary", - "positive" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#COARDS", - "processing_level" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#processing_level", - "product_version" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#product_version", - "program" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#program", - "project" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#project", - "publisher_email" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#publisher_email", - "publisher_institution" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#publisher_institution", - "publisher_name" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#publisher_name", - "publisher_type" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#publisher_type", - "publisher_url" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#publisher_url", - "references" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#references", - "sample_dimension" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#representations-features", - "scale_factor" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#packed-data", - "source" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#source", - "standard_error_multiplier" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#standard-name-modifiers", - "standard_name" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#standard-name", - "standard_name_vocabulary" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#standard_name_vocabulary", - "summary" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#summary", - "time_coverage_duration" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#time_coverage_duration", - "time_coverage_end" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#time_coverage_end", - "time_coverage_resolution" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#time_coverage_resolution", - "time_coverage_start" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#time_coverage_start", - "title" : "http://wiki.esipfed.org/index.php/Attribute_Convention_for_Data_Discovery_1-3#title", - "units" : "http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#units", - "valid_max" : "http://www.unidata.ucar.edu/netcdf/docs/netcdf.html#Attribute-Conventions", - "valid_min" : "http://www.unidata.ucar.edu/netcdf/docs/netcdf.html#Attribute-Conventions", - "valid_range" : "http://www.unidata.ucar.edu/netcdf/docs/netcdf.html#Attribute-Conventions", + "Conventions" : "https://def.scitools.org.uk/ACDD/{}", + "_FillValue" : "https://def.scitools.org.uk/NetCDF/{}", + "acknowledgement" : "https://def.scitools.org.uk/ACDD/{}", + "add_offset" : "https://def.scitools.org.uk/CFTerms/{}", + "ancillary_variables" : "https://def.scitools.org.uk/CFTerms/{}", + "axis" : "https://def.scitools.org.uk/CFTerms/{}", + "bounds" : "https://def.scitools.org.uk/CFTerms/{}", + "calendar" : "https://def.scitools.org.uk/CFTerms/{}", + "cdm_data_type" : "https://def.scitools.org.uk/ACDD/{}", + "cell_measures" : "https://def.scitools.org.uk/CFTerms/{}", + "cell_methods" : "https://def.scitools.org.uk/CFTerms/{}", + "cf_role" : "https://def.scitools.org.uk/CFTerms/{}", + "climatology" : "https://def.scitools.org.uk/CFTerms/{}", + "comment" : "https://def.scitools.org.uk/ACDD/{}", + "compress" : "https://def.scitools.org.uk/CFTerms/{}", + "contributor_name" : "https://def.scitools.org.uk/ACDD/{}", + "contributor_role" : "https://def.scitools.org.uk/ACDD/{}", + "coordinates" : "https://def.scitools.org.uk/CFTerms/{}", + "coverage_content_type" : "https://def.scitools.org.uk/ACDD/{}", + "creator_email" : "https://def.scitools.org.uk/ACDD/{}", + "creator_institution" : "https://def.scitools.org.uk/ACDD/{}", + "creator_name" : "https://def.scitools.org.uk/ACDD/{}", + "creator_type" : "https://def.scitools.org.uk/ACDD/{}", + "creator_url" : "https://def.scitools.org.uk/ACDD/{}", + "date_created" : "https://def.scitools.org.uk/ACDD/{}", + "date_issued" : "https://def.scitools.org.uk/ACDD/{}", + "date_metadata_modified" : "https://def.scitools.org.uk/ACDD/{}", + "date_modified" : "https://def.scitools.org.uk/ACDD/{}", + "featureType" : "https://def.scitools.org.uk/CFTerms/{}", + "flag_masks" : "https://def.scitools.org.uk/CFTerms/{}", + "flag_meanings" : "https://def.scitools.org.uk/CFTerms/{}", + "flag_values" : "https://def.scitools.org.uk/CFTerms/{}", + "formula_terms" : "https://def.scitools.org.uk/CFTerms/{}", + "geospatial_bounds" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_bounds_crs" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_bounds_vertical_crs" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_lat_max" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_lat_min" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_lat_resolution" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_lat_units" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_lon_max" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_lon_min" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_lon_resolution" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_lon_units" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_vertical_max" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_vertical_min" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_vertical_positive" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_vertical_resolution" : "https://def.scitools.org.uk/ACDD/{}", + "geospatial_vertical_units" : "https://def.scitools.org.uk/ACDD/{}", + "grid_mapping" : "https://def.scitools.org.uk/CFTerms/{}", + "history" : "https://def.scitools.org.uk/ACDD/{}", + "id" : "https://def.scitools.org.uk/ACDD/{}", + "instance_dimension" : "https://def.scitools.org.uk/CFTerms/{}", + "institution" : "https://def.scitools.org.uk/ACDD/{}", + "instrument" : "https://def.scitools.org.uk/ACDD/{}", + "instrument_vocabulary" : "https://def.scitools.org.uk/ACDD/{}", + "keywords" : "https://def.scitools.org.uk/ACDD/{}", + "keywords_vocabulary" : "https://def.scitools.org.uk/ACDD/{}", + "leap_month" : "https://def.scitools.org.uk/CFTerms/{}", + "leap_year" : "https://def.scitools.org.uk/CFTerms/{}", + "license" : "https://def.scitools.org.uk/ACDD/{}", + "long_name" : "https://def.scitools.org.uk/CFTerms/{}", + "metadata_link" : "https://def.scitools.org.uk/ACDD/{}", + "missing_value" : "https://def.scitools.org.uk/CFTerms/{}", + "month_lengths" : "https://def.scitools.org.uk/CFTerms/{}", + "naming_authority" : "https://def.scitools.org.uk/ACDD/{}", + "platform" : "https://def.scitools.org.uk/ACDD/{}", + "platform_vocabulary" : "https://def.scitools.org.uk/ACDD/{}", + "positive" : "https://def.scitools.org.uk/CFTerms/{}", + "processing_level" : "https://def.scitools.org.uk/ACDD/{}", + "product_version" : "https://def.scitools.org.uk/ACDD/{}", + "program" : "https://def.scitools.org.uk/ACDD/{}", + "project" : "https://def.scitools.org.uk/ACDD/{}", + "publisher_email" : "https://def.scitools.org.uk/ACDD/{}", + "publisher_institution" : "https://def.scitools.org.uk/ACDD/{}", + "publisher_name" : "https://def.scitools.org.uk/ACDD/{}", + "publisher_type" : "https://def.scitools.org.uk/ACDD/{}", + "publisher_url" : "https://def.scitools.org.uk/ACDD/{}", + "references" : "https://def.scitools.org.uk/ACDD/{}", + "sample_dimension" : "https://def.scitools.org.uk/CFTerms/{}", + "scale_factor" : "https://def.scitools.org.uk/CFTerms/{}", + "source" : "https://def.scitools.org.uk/ACDD/{}", + "standard_error_multiplier" : "https://def.scitools.org.uk/CFTerms/{}", + "standard_name" : "https://def.scitools.org.uk/CFTerms/{}", + "standard_name_vocabulary" : "https://def.scitools.org.uk/ACDD/{}", + "summary" : "https://def.scitools.org.uk/ACDD/{}", + "time_coverage_duration" : "https://def.scitools.org.uk/ACDD/{}", + "time_coverage_end" : "https://def.scitools.org.uk/ACDD/{}", + "time_coverage_resolution" : "https://def.scitools.org.uk/ACDD/{}", + "time_coverage_start" : "https://def.scitools.org.uk/ACDD/{}", + "title" : "https://def.scitools.org.uk/ACDD/{}", + "units" : "https://def.scitools.org.uk/CFTerms/{}", + "valid_max" : "https://def.scitools.org.uk/NetCDF/{}", + "valid_min" : "https://def.scitools.org.uk/NetCDF/{}", + "valid_range" : "https://def.scitools.org.uk/NetCDF/{}", "SDN_ParameterDiscoveryCode" : "http://vocab.nerc.ac.uk/isoCodelists/sdnCodelists/cdicsrCodeList.xml#{}", "unit_id" : "http://qudt.org/1.1/schema/qudt#unit", "medium_id" : "http://environment.data.gov.au/def/op#matrix", From cd88109fb7fc95fd40cf010f33335ee03be6938e Mon Sep 17 00:00:00 2001 From: Jonathan Yu Date: Wed, 6 Sep 2017 15:29:37 +1000 Subject: [PATCH 10/12] updating tests to reflect URL checks --- lib/bald/tests/integration/test_cdl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/bald/tests/integration/test_cdl.py b/lib/bald/tests/integration/test_cdl.py index a7f87d1..03b2d50 100644 --- a/lib/bald/tests/integration/test_cdl.py +++ b/lib/bald/tests/integration/test_cdl.py @@ -64,8 +64,7 @@ def test_grid_OISST_GHRSST(self): validation = bald.validate_netcdf(tfile) exns = validation.exceptions() exns.sort() - expected = ['http://doi.org/10.7289/V5SQ8XB5 is not resolving as a resource (404).', - 'http://www.ncdc.noaa.gov/sst is not resolving as a resource (404).', + expected = [ 'http://www.ncdc.noaa.gov/sst is not resolving as a resource (404).', 'http://www.ncdc.noaa.gov/sst/ is not resolving as a resource (404).'] expected.sort() self.assertTrue(not validation.is_valid() and exns == expected, From a8c4a5b2ae29e8d3f9a415058e478cd5d005972e Mon Sep 17 00:00:00 2001 From: Jonathan Yu <4723726+jyucsiro@users.noreply.github.com> Date: Thu, 7 Sep 2017 04:29:44 +1000 Subject: [PATCH 11/12] removing bug --- lib/bald/tests/integration/test_cdl_rdfgraph.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/bald/tests/integration/test_cdl_rdfgraph.py b/lib/bald/tests/integration/test_cdl_rdfgraph.py index 2601881..be73efe 100644 --- a/lib/bald/tests/integration/test_cdl_rdfgraph.py +++ b/lib/bald/tests/integration/test_cdl_rdfgraph.py @@ -42,7 +42,6 @@ def test_ereefs(self): cdl_file = os.path.join(self.cdl_path, 'ereefs_gbr4_ncld.cdl') subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf(tfile) - testPassed try: ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") except TypeError: From 0999f88c2c42e376162125bc3a2bb2566f9bda78 Mon Sep 17 00:00:00 2001 From: Jonathan Yu <4723726+jyucsiro@users.noreply.github.com> Date: Thu, 7 Sep 2017 06:49:45 +1000 Subject: [PATCH 12/12] fixing bald lib to convert np.ndarray to list --- lib/bald/__init__.py | 25 ++++++----- lib/bald/tests/integration/CDL/array_geo.cdl | 45 +++++++++++++++++++ .../tests/integration/test_cdl_rdfgraph.py | 3 +- 3 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 lib/bald/tests/integration/CDL/array_geo.cdl diff --git a/lib/bald/__init__.py b/lib/bald/__init__.py index a25653e..c421292 100644 --- a/lib/bald/__init__.py +++ b/lib/bald/__init__.py @@ -415,6 +415,12 @@ def rdfnode(self, graph): selfnode = rdflib.URIRef(self.identity) for attr in self.attrs: objs = self.attrs[attr] + if(isinstance(objs, np.ndarray)): + #print("Found np.ndarray") + #print(objs) + #print(attr) + #try to convert np.ndarray to a list + objs = objs.tolist() if not (isinstance(objs, set) or isinstance(objs, list)): objs = set([objs]) for obj in objs: @@ -442,6 +448,9 @@ def rdfgraph(self): for prefix_name in self._prefixes: #strip the double underscore suffix new_name = prefix_name[:-2] + #print(prefix_name) + #print(new_name) + #print(self._prefixes[prefix_name]) graph.bind(new_name, self._prefixes[prefix_name]) graph = self.rdfnode(graph) @@ -550,13 +559,9 @@ def load_netcdf(afilepath, uri=None, baseuri=None): prefix_var = (fhandle[fhandle.bald__isPrefixedBy] if hasattr(fhandle, 'bald__isPrefixedBy') else {}) prefixes = {} - - skipped_variables = [] - if prefix_var != {}: + if prefix_var != {} : prefixes = (dict([(prefix, getattr(prefix_var, prefix)) for prefix in prefix_var.ncattrs()])) - if isinstance(prefix_var, netCDF4._netCDF4.Variable): - skipped_variables.append(prefix_var.name) else: for k in fhandle.ncattrs(): if k.endswith('__'): @@ -581,8 +586,7 @@ def load_netcdf(afilepath, uri=None, baseuri=None): if alias_var != {}: aliases = (dict([(alias, getattr(alias_var, alias)) for alias in alias_var.ncattrs()])) - if isinstance(alias_var, netCDF4._netCDF4.Variable): - skipped_variables.append(alias_var.name) + #print(aliases) attrs = {} for k in fhandle.ncattrs(): @@ -614,16 +618,13 @@ def load_netcdf(afilepath, uri=None, baseuri=None): #sattrs['bald__array'] = name sattrs['bald__array'] = identity sattrs['rdf__type'] = 'bald__Reference' + if fhandle.variables[name].shape: sattrs['bald__shape'] = fhandle.variables[name].shape var = Array(identity, sattrs, prefixes=prefixes, aliases=aliases) else: var = Subject(identity, sattrs, prefixes=prefixes, aliases=aliases) - if name not in skipped_variables: - # Don't include skipped variables, such as prefix or alias - # variables, within the containment relation. - root_container.attrs['bald__contains'].append(var) - + root_container.attrs['bald__contains'].append(var) file_variables[name] = var diff --git a/lib/bald/tests/integration/CDL/array_geo.cdl b/lib/bald/tests/integration/CDL/array_geo.cdl new file mode 100644 index 0000000..70585c0 --- /dev/null +++ b/lib/bald/tests/integration/CDL/array_geo.cdl @@ -0,0 +1,45 @@ +netcdf tmpMwXy8U { +dimensions: + pdim0 = 11 ; + pdim1 = 17 ; +variables: + int prefix_list ; + prefix_list:bald__ = "http://binary-array-ld.net/latest/" ; + prefix_list:rdf__ = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ; + prefix_list:rdfs__ = "http://www.w3.org/2000/01/rdf-schema#" ; + prefix_list:cf__ = "http://def.scitools.org.uk/CFTerms/" ; + prefix_list:geo__ = "http://www.opengis.net/ont/geosparql#" ; + + int temp(pdim0, pdim1) ; + temp:cf__standard_name = "air_temperature" ; + temp:cf__long_name = "Air temperature obs example at point" ; + temp:rdfs__label = "Air temperature obs example at point" ; + temp:geo__asWKT = "POINT(-77.03524 38.889468)" ; + + int pressure(pdim0, pdim1) ; + pressure:cf__standard_name = "air_pressure" ; + pressure:cf__long_name = "Air pressure at UCAR Centre Green" ; + pressure:rdfs__label = "Air pressure at UCAR Centre Green" ; + pressure:geo__asWKT = "POINT(-105.24584700000003 40.0315278)" ; + +// global attributes: + :_NCProperties = "version=1|netcdflibversion=4.4.1|hdf5libversion=1.8.17" ; + :rdf__type = "bald__Container" ; + :bald__isPrefixedBy = "prefix_list" ; +data: + + temp = + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ ; + + +} diff --git a/lib/bald/tests/integration/test_cdl_rdfgraph.py b/lib/bald/tests/integration/test_cdl_rdfgraph.py index be73efe..ca54b07 100644 --- a/lib/bald/tests/integration/test_cdl_rdfgraph.py +++ b/lib/bald/tests/integration/test_cdl_rdfgraph.py @@ -43,7 +43,8 @@ def test_ereefs(self): subprocess.check_call(['ncgen', '-o', tfile, cdl_file]) root_container = bald.load_netcdf(tfile) try: - ttl = root_container.rdfgraph().serialize(format='n3').decode("utf-8") + g = root_container.rdfgraph() + ttl = g.serialize(format='n3').decode("utf-8") except TypeError: self.fail("Test case could not convert ereefs CDL to RDF")