From bf960613e0004fbb06a79383223a6caa8f4c90be Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Sun, 12 Feb 2023 01:54:03 +0100 Subject: [PATCH] Added flatten directive to ArraySchema --- schema_salad/avro/schema.py | 13 +++- schema_salad/metaschema.py | 71 +++++++++++++++------ schema_salad/metaschema/metaschema_base.yml | 7 ++ schema_salad/python_codegen.py | 7 +- schema_salad/python_codegen_support.py | 6 +- schema_salad/schema.py | 1 + schema_salad/tests/cwl-pre.yml | 23 +++++-- schema_salad/tests/metaschema-pre.yml | 13 ++++ schema_salad/tests/test_makedoc.py | 2 +- schema_salad/tests/test_schema/Base.yml | 10 ++- schema_salad/tests/test_schema/Process.yml | 2 +- schema_salad/tests/test_schema/Workflow.yml | 9 +-- 12 files changed, 125 insertions(+), 39 deletions(-) diff --git a/schema_salad/avro/schema.py b/schema_salad/avro/schema.py index d801fb3c..6ef6cd04 100644 --- a/schema_salad/avro/schema.py +++ b/schema_salad/avro/schema.py @@ -395,7 +395,11 @@ def symbols(self) -> List[str]: class ArraySchema(Schema): def __init__( - self, items: JsonDataType, names: Names, other_props: Optional[PropsType] = None + self, + items: JsonDataType, + names: Names, + flatten: Optional[bool] = True, + other_props: Optional[PropsType] = None, ) -> None: # Call parent ctor Schema.__init__(self, "array", other_props) @@ -415,12 +419,16 @@ def __init__( ) from err self.set_prop("items", items_schema) + self.set_prop("flatten", flatten) # read-only properties @property def items(self) -> Schema: return cast(Schema, self.get_prop("items")) + def flatten(self) -> bool: + return cast(bool, self.get_prop("flatten")) + class MapSchema(Schema): def __init__( @@ -681,7 +689,8 @@ def make_avsc_object(json_data: JsonDataType, names: Optional[Names] = None) -> if atype in VALID_TYPES: if atype == "array": items = json_data.get("items") - return ArraySchema(items, names, other_props) + flatten = json_data.get("flatten") + return ArraySchema(items, names, flatten, other_props) elif atype == "map": name = json_data.get("name") namespace = json_data.get("namespace", names.default_namespace) diff --git a/schema_salad/metaschema.py b/schema_salad/metaschema.py index 0368e876..ef29a9f5 100644 --- a/schema_salad/metaschema.py +++ b/schema_salad/metaschema.py @@ -382,9 +382,9 @@ def __repr__(self): # type: () -> str class _ArrayLoader(_Loader): - def __init__(self, items): - # type: (_Loader) -> None + def __init__(self, items: _Loader, flatten: bool = True) -> None: self.items = items + self.flatten = flatten def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any @@ -395,7 +395,7 @@ def load(self, doc, baseuri, loadingOptions, docRoot=None): for i in range(0, len(doc)): try: lf = load_field(doc[i], _UnionLoader((self, self.items)), baseuri, loadingOptions) - if isinstance(lf, MutableSequence): + if self.flatten and isinstance(lf, MutableSequence): r.extend(lf) else: r.append(lf) @@ -1365,6 +1365,7 @@ def __init__( self, items: Any, type: Any, + flatten: Optional[Any] = None, extension_fields: Optional[Dict[str, Any]] = None, loadingOptions: Optional[LoadingOptions] = None, ) -> None: @@ -1377,16 +1378,21 @@ def __init__( self.loadingOptions = loadingOptions else: self.loadingOptions = LoadingOptions() + self.flatten = flatten self.items = items self.type = type def __eq__(self, other: Any) -> bool: if isinstance(other, ArraySchema): - return bool(self.items == other.items and self.type == other.type) + return bool( + self.flatten == other.flatten + and self.items == other.items + and self.type == other.type + ) return False def __hash__(self) -> int: - return hash((self.items, self.type)) + return hash((self.flatten, self.items, self.type)) @classmethod def fromDoc( @@ -1401,6 +1407,24 @@ def fromDoc( _doc.lc.data = doc.lc.data _doc.lc.filename = doc.lc.filename _errors__ = [] + if "flatten" in _doc: + try: + flatten = load_field( + _doc.get("flatten"), + uri_union_of_None_type_or_booltype_False_True_2, + baseuri, + loadingOptions, + ) + except ValidationException as e: + _errors__.append( + ValidationException( + "the `flatten` field is not valid because:", + SourceLine(_doc, "flatten", str), + [e], + ) + ) + else: + flatten = None try: items = load_field( _doc.get("items"), @@ -1442,7 +1466,7 @@ def fromDoc( else: _errors__.append( ValidationException( - "invalid field `{}`, expected one of: `items`, `type`".format( + "invalid field `{}`, expected one of: `flatten`, `items`, `type`".format( k ), SourceLine(_doc, k, str), @@ -1453,6 +1477,7 @@ def fromDoc( if _errors__: raise ValidationException("Trying 'ArraySchema'", None, _errors__) _constructed = cls( + flatten=flatten, items=items, type=type, extension_fields=extension_fields, @@ -1471,6 +1496,9 @@ def save( else: for ef in self.extension_fields: r[ef] = self.extension_fields[ef] + if self.flatten is not None: + u = save_relative_uri(self.flatten, base_url, False, 2, relative_uris) + r["flatten"] = u if self.items is not None: u = save_relative_uri(self.items, base_url, False, 2, relative_uris) r["items"] = u @@ -1487,7 +1515,7 @@ def save( r["$schemas"] = self.loadingOptions.schemas return r - attrs = frozenset(["items", "type"]) + attrs = frozenset(["flatten", "items", "type"]) class MapSchema(Saveable): @@ -4619,7 +4647,7 @@ def save( SaladMapSchemaLoader = _RecordLoader(SaladMapSchema) SaladUnionSchemaLoader = _RecordLoader(SaladUnionSchema) DocumentationLoader = _RecordLoader(Documentation) -array_of_strtype = _ArrayLoader(strtype) +array_of_strtype = _ArrayLoader(strtype, True) union_of_None_type_or_strtype_or_array_of_strtype = _UnionLoader( ( None_type, @@ -4640,7 +4668,8 @@ def save( ) ) array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype = _ArrayLoader( - union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype + union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype, + True, ) union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype_or_array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype = _UnionLoader( ( @@ -4658,7 +4687,7 @@ def save( union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype_or_array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype, 2, ) -array_of_RecordFieldLoader = _ArrayLoader(RecordFieldLoader) +array_of_RecordFieldLoader = _ArrayLoader(RecordFieldLoader, True) union_of_None_type_or_array_of_RecordFieldLoader = _UnionLoader( ( None_type, @@ -4682,6 +4711,15 @@ def save( uri_array_of_strtype_True_False_None = _URILoader(array_of_strtype, True, False, None) Enum_nameLoader = _EnumLoader(("enum",), "Enum_name") typedsl_Enum_nameLoader_2 = _TypeDSLLoader(Enum_nameLoader, 2) +union_of_None_type_or_booltype = _UnionLoader( + ( + None_type, + booltype, + ) +) +uri_union_of_None_type_or_booltype_False_True_2 = _URILoader( + union_of_None_type_or_booltype, False, True, 2 +) uri_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype_or_array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype_False_True_2 = _URILoader( union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype_or_array_of_union_of_PrimitiveTypeLoader_or_RecordSchemaLoader_or_EnumSchemaLoader_or_ArraySchemaLoader_or_MapSchemaLoader_or_UnionSchemaLoader_or_strtype, False, @@ -4694,12 +4732,6 @@ def save( typedsl_Map_nameLoader_2 = _TypeDSLLoader(Map_nameLoader, 2) Union_nameLoader = _EnumLoader(("union",), "Union_name") typedsl_Union_nameLoader_2 = _TypeDSLLoader(Union_nameLoader, 2) -union_of_None_type_or_booltype = _UnionLoader( - ( - None_type, - booltype, - ) -) union_of_None_type_or_inttype = _UnionLoader( ( None_type, @@ -4726,7 +4758,7 @@ def save( Any_type, ) ) -array_of_SaladRecordFieldLoader = _ArrayLoader(SaladRecordFieldLoader) +array_of_SaladRecordFieldLoader = _ArrayLoader(SaladRecordFieldLoader, True) union_of_None_type_or_array_of_SaladRecordFieldLoader = _UnionLoader( ( None_type, @@ -4739,7 +4771,7 @@ def save( uri_union_of_None_type_or_strtype_or_array_of_strtype_False_False_1 = _URILoader( union_of_None_type_or_strtype_or_array_of_strtype, False, False, 1 ) -array_of_SpecializeDefLoader = _ArrayLoader(SpecializeDefLoader) +array_of_SpecializeDefLoader = _ArrayLoader(SpecializeDefLoader, True) union_of_None_type_or_array_of_SpecializeDefLoader = _UnionLoader( ( None_type, @@ -4761,7 +4793,8 @@ def save( ) ) array_of_union_of_SaladRecordSchemaLoader_or_SaladEnumSchemaLoader_or_SaladMapSchemaLoader_or_SaladUnionSchemaLoader_or_DocumentationLoader = _ArrayLoader( - union_of_SaladRecordSchemaLoader_or_SaladEnumSchemaLoader_or_SaladMapSchemaLoader_or_SaladUnionSchemaLoader_or_DocumentationLoader + union_of_SaladRecordSchemaLoader_or_SaladEnumSchemaLoader_or_SaladMapSchemaLoader_or_SaladUnionSchemaLoader_or_DocumentationLoader, + True, ) union_of_SaladRecordSchemaLoader_or_SaladEnumSchemaLoader_or_SaladMapSchemaLoader_or_SaladUnionSchemaLoader_or_DocumentationLoader_or_array_of_union_of_SaladRecordSchemaLoader_or_SaladEnumSchemaLoader_or_SaladMapSchemaLoader_or_SaladUnionSchemaLoader_or_DocumentationLoader = _UnionLoader( ( diff --git a/schema_salad/metaschema/metaschema_base.yml b/schema_salad/metaschema/metaschema_base.yml index 25607ce8..808d5ad4 100644 --- a/schema_salad/metaschema/metaschema_base.yml +++ b/schema_salad/metaschema/metaschema_base.yml @@ -189,6 +189,13 @@ $graph: _type: "@vocab" refScope: 2 doc: "Defines the type of the array elements." + flatten: + type: boolean? + jsonldPredicate: + _id: "sld:flatten" + _type: "@vocab" + refScope: 2 + doc: "Flatten inner array objects into a single sequence (default: true)." - name: MapSchema diff --git a/schema_salad/python_codegen.py b/schema_salad/python_codegen.py index f5f0e0b0..3ed3fa68 100644 --- a/schema_salad/python_codegen.py +++ b/schema_salad/python_codegen.py @@ -391,7 +391,12 @@ def type_loader(self, type_declaration: Union[List[Any], Dict[str, Any], str]) - "https://w3id.org/cwl/salad#array", ): i = self.type_loader(type_declaration["items"]) - return self.declare_type(TypeDef(f"array_of_{i.name}", f"_ArrayLoader({i.name})")) + return self.declare_type( + TypeDef( + f"array_of_{i.name}", + f"_ArrayLoader({i.name}, {type_declaration.get('flatten', True)})", + ) + ) if type_declaration["type"] in ( "map", "https://w3id.org/cwl/salad#map", diff --git a/schema_salad/python_codegen_support.py b/schema_salad/python_codegen_support.py index ec0d64b0..110c9ffb 100644 --- a/schema_salad/python_codegen_support.py +++ b/schema_salad/python_codegen_support.py @@ -379,9 +379,9 @@ def __repr__(self): # type: () -> str class _ArrayLoader(_Loader): - def __init__(self, items): - # type: (_Loader) -> None + def __init__(self, items: _Loader, flatten: bool = True) -> None: self.items = items + self.flatten = flatten def load(self, doc, baseuri, loadingOptions, docRoot=None): # type: (Any, str, LoadingOptions, Optional[str]) -> Any @@ -392,7 +392,7 @@ def load(self, doc, baseuri, loadingOptions, docRoot=None): for i in range(0, len(doc)): try: lf = load_field(doc[i], _UnionLoader((self, self.items)), baseuri, loadingOptions) - if isinstance(lf, MutableSequence): + if self.flatten and isinstance(lf, MutableSequence): r.extend(lf) else: r.append(lf) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index fd6bfe65..1ed56d43 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -138,6 +138,7 @@ def get_metaschema() -> Tuple[Names, List[Dict[str, str]], Loader]: "mapPredicate": "type", "mapSubject": "name", }, + "flatten": saladp + "flatten", "float": "http://www.w3.org/2001/XMLSchema#float", "identity": saladp + "JsonldPredicate/identity", "inVocab": saladp + "NamedType/inVocab", diff --git a/schema_salad/tests/cwl-pre.yml b/schema_salad/tests/cwl-pre.yml index f42ec413..7bfb5614 100644 --- a/schema_salad/tests/cwl-pre.yml +++ b/schema_salad/tests/cwl-pre.yml @@ -573,7 +573,6 @@ "name": "https://w3id.org/cwl/cwl#CWLObjectType", "type": "union", "names": [ - "null", "boolean", "int", "long", @@ -583,12 +582,19 @@ "https://w3id.org/cwl/cwl#File", "https://w3id.org/cwl/cwl#Directory", { - "items": "https://w3id.org/cwl/cwl#CWLObjectType", + "flatten": false, + "items": [ + "null", + "https://w3id.org/cwl/cwl#CWLObjectType" + ], "type": "array" }, { "type": "map", - "values": "https://w3id.org/cwl/cwl#CWLObjectType" + "values": [ + "null", + "https://w3id.org/cwl/cwl#CWLObjectType" + ] } ], "doc": "Generic type representing a valid CWL object. It is used to represent\n`default` values passed to CWL `InputParameter` and `WorkflowStepInput`\nrecord fields.\n" @@ -997,7 +1003,10 @@ }, { "name": "https://w3id.org/cwl/cwl#InputParameter/default", - "type": "https://w3id.org/cwl/cwl#CWLObjectType", + "type": [ + "null", + "https://w3id.org/cwl/cwl#CWLObjectType" + ], "jsonldPredicate": "cwl:default", "doc": "The default value for this parameter if not provided in the input\nobject.\n" }, @@ -2263,7 +2272,10 @@ }, { "name": "https://w3id.org/cwl/cwl#WorkflowStepInput/default", - "type": "https://w3id.org/cwl/cwl#CWLObjectType", + "type": [ + "null", + "https://w3id.org/cwl/cwl#CWLObjectType" + ], "doc": "The default value for this parameter if there is no `source`\nfield.\n", "jsonldPredicate": "cwl:default" }, @@ -2548,6 +2560,7 @@ "name": "https://w3id.org/cwl/cwl#CWLInputFile", "doc": "Type representing a valid CWL input file as a `map, CWLObjectType>>`.", "values": [ + "null", { "items": "https://w3id.org/cwl/cwl#ProcessRequirement", "type": "array" diff --git a/schema_salad/tests/metaschema-pre.yml b/schema_salad/tests/metaschema-pre.yml index de908055..3de2f8b6 100644 --- a/schema_salad/tests/metaschema-pre.yml +++ b/schema_salad/tests/metaschema-pre.yml @@ -257,6 +257,19 @@ "name": "https://w3id.org/cwl/salad#ArraySchema", "type": "record", "fields": [ + { + "type": [ + "null", + "boolean" + ], + "jsonldPredicate": { + "_id": "https://w3id.org/cwl/salad#flatten", + "_type": "@vocab", + "refScope": 2 + }, + "doc": "Flatten inner array objects into a single sequence (default: true).", + "name": "https://w3id.org/cwl/salad#ArraySchema/flatten" + }, { "type": [ "PrimitiveType", diff --git a/schema_salad/tests/test_makedoc.py b/schema_salad/tests/test_makedoc.py index 0cff3857..d38edb0b 100644 --- a/schema_salad/tests/test_makedoc.py +++ b/schema_salad/tests/test_makedoc.py @@ -239,5 +239,5 @@ def test_detect_changes_in_html(metaschema_doc: str, tmp_path: Path) -> None: with open(result, "w") as h: h.write(metaschema_doc) assert ( - hasher.hexdigest() == "108722da130cb85c6dc76e9925789f698e26cd42ab0056975b524449d8e469f7" + hasher.hexdigest() == "9f42a2951050100c81028d3e082899951ef914d942e6576c512c77530fe41fc4" ), result diff --git a/schema_salad/tests/test_schema/Base.yml b/schema_salad/tests/test_schema/Base.yml index 4574de4c..e3ee8706 100644 --- a/schema_salad/tests/test_schema/Base.yml +++ b/schema_salad/tests/test_schema/Base.yml @@ -275,7 +275,6 @@ $graph: - name: CWLObjectType type: union names: - - "null" - boolean - int - long @@ -285,9 +284,14 @@ $graph: - File - Directory - type: array - items: CWLObjectType + items: + - "null" + - CWLObjectType + flatten: false - type: map - values: CWLObjectType + values: + - "null" + - CWLObjectType doc: | Generic type representing a valid CWL object. It is used to represent `default` values passed to CWL `InputParameter` and `WorkflowStepInput` diff --git a/schema_salad/tests/test_schema/Process.yml b/schema_salad/tests/test_schema/Process.yml index fd8f4549..1ba24197 100644 --- a/schema_salad/tests/test_schema/Process.yml +++ b/schema_salad/tests/test_schema/Process.yml @@ -305,7 +305,7 @@ $graph: into a concrete form for execution, such as command line parameters. - name: default - type: CWLObjectType + type: CWLObjectType? jsonldPredicate: "cwl:default" doc: | The default value for this parameter if not provided in the input diff --git a/schema_salad/tests/test_schema/Workflow.yml b/schema_salad/tests/test_schema/Workflow.yml index 156c1a8a..9ef874ae 100644 --- a/schema_salad/tests/test_schema/Workflow.yml +++ b/schema_salad/tests/test_schema/Workflow.yml @@ -256,7 +256,7 @@ $graph: jsonldPredicate: "@id" doc: "A unique identifier for this workflow input parameter." - name: default - type: CWLObjectType + type: CWLObjectType? doc: | The default value for this parameter if there is no `source` field. @@ -585,8 +585,9 @@ $graph: - name: CWLInputFile type: map values: - - type: array - items: ProcessRequirement - - CWLObjectType + - "null" + - type: array + items: ProcessRequirement + - CWLObjectType doc: | Type representing a valid CWL input file as a `map, CWLObjectType>>`. \ No newline at end of file