From c8fa1b4a139a6b19a03b0284ba76f0f990fe3190 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 7 Oct 2025 11:10:25 +0100 Subject: [PATCH 01/11] Initial WIP for dataless merges -- cannot yet merge datafull+dataless. --- lib/iris/_merge.py | 66 ++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index aebbffdfee..1ff80827a9 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -320,6 +320,7 @@ class _CubeSignature( "data_type", "cell_measures_and_dims", "ancillary_variables_and_dims", + "is_dataless", ], ) ): @@ -430,6 +431,9 @@ def match(self, other, error_on_mismatch): if self.data_shape != other.data_shape: msg = "cube.shape differs: {} != {}" msgs.append(msg.format(self.data_shape, other.data_shape)) + if self.is_dataless != other.is_dataless: + msg = "cube.is_dataless differs: {} != {}" + msgs.append(msg.format(self.is_dataless, other.is_dataless)) if self.data_type != other.data_type: msg = "cube data dtype differs: {} != {}" msgs.append(msg.format(self.data_type, other.data_type)) @@ -1109,8 +1113,9 @@ def __init__(self, cube): source-cube. """ - if cube.is_dataless(): - raise iris.exceptions.DatalessError("merge") + # if cube.is_dataless(): + # raise iris.exceptions.DatalessError("merge") + # Default hint ordering for candidate dimension coordinates. self._hints = [ "time", @@ -1234,33 +1239,42 @@ def merge(self, unique=True): # Generate group-depth merged cubes from the source-cubes. for level in range(group_depth): - # Stack up all the data from all of the relevant source - # cubes in a single dask "stacked" array. - # If it turns out that all the source cubes already had - # their data loaded then at the end we convert the stack back - # into a plain numpy array. - stack = np.empty(self._stack_shape, "object") - all_have_data = True - for nd_index in nd_indexes: - # Get the data of the current existing or last known - # good source-cube - group = group_by_nd_index[nd_index] - offset = min(level, len(group) - 1) - data = self._skeletons[group[offset]].data - # Ensure the data is represented as a dask array and - # slot that array into the stack. - if is_lazy_data(data): - all_have_data = False - else: - data = as_lazy_data(data) - stack[nd_index] = data + if self._cube_signature.is_dataless: + merged_shape = self._cube_signature.data_shape + # ?WRONG? merged_shape = self._stack_shape + # ?WRONG? merged_shape = (len(nd_indexes),) + shape + merged_data = None + all_have_data = False + else: + # Stack up all the data from all of the relevant source + # cubes in a single dask "stacked" array. + # If it turns out that all the source cubes already had + # their data loaded then at the end we convert the stack back + # into a plain numpy array. + stack = np.empty(self._stack_shape, "object") + all_have_data = True + for nd_index in nd_indexes: + # Get the data of the current existing or last known + # good source-cube + group = group_by_nd_index[nd_index] + offset = min(level, len(group) - 1) + data = self._skeletons[group[offset]].data + # Ensure the data is represented as a dask array and + # slot that array into the stack. + if is_lazy_data(data): + all_have_data = False + else: + data = as_lazy_data(data) + stack[nd_index] = data + + merged_data = multidim_lazy_stack(stack) + merged_shape = None - merged_data = multidim_lazy_stack(stack) if all_have_data: # All inputs were concrete, so turn the result back into a # normal array. merged_data = as_concrete_data(merged_data) - merged_cube = self._get_cube(merged_data) + merged_cube = self._get_cube(merged_data, shape=merged_shape) merged_cubes.append(merged_cube) return merged_cubes @@ -1545,7 +1559,7 @@ def name_in_independents(): # deferred loading, this does NOT change the shape. self._shape.extend(signature.data_shape) - def _get_cube(self, data): + def _get_cube(self, data, shape=None): """Generate fully constructed cube. Return a fully constructed cube for the given data, containing @@ -1573,6 +1587,7 @@ def _get_cube(self, data): aux_coords_and_dims=aux_coords_and_dims, cell_measures_and_dims=cms_and_dims, ancillary_variables_and_dims=avs_and_dims, + shape=shape, **kwargs, ) @@ -1711,6 +1726,7 @@ def _build_signature(self, cube): cube.dtype, cube._cell_measures_and_dims, cube._ancillary_variables_and_dims, + cube.is_dataless(), ) def _add_cube(self, cube, coord_payload): From c4165a977a86434d139cb25206ec793c14eec5ab Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 7 Oct 2025 14:40:33 +0100 Subject: [PATCH 02/11] Starting tests. --- .../tests/integration/merge/test_dataless.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 lib/iris/tests/integration/merge/test_dataless.py diff --git a/lib/iris/tests/integration/merge/test_dataless.py b/lib/iris/tests/integration/merge/test_dataless.py new file mode 100644 index 0000000000..3fd3d04550 --- /dev/null +++ b/lib/iris/tests/integration/merge/test_dataless.py @@ -0,0 +1,64 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Integration tests for merging with dataless cubes.""" + +import numpy as np + +from iris.coords import AuxCoord, DimCoord +from iris.cube import Cube, CubeList + + +class TestMergeDataless: + def _testcube(self, z=1, name="this", dataless=False): + # Create a testcube with a scalar Z coord, for merge testing. + cube = Cube( + [1, 2, 3], + dim_coords_and_dims=[(DimCoord([0.0, 1.0, 2], long_name="x"), 0)], + aux_coords_and_dims=[(AuxCoord([z], long_name="z"), ())], + ) + if dataless: + cube.data = None + return cube + + def test_general_nomerge(self): + # Check that normal merge works OK with dataless cubes included + cubes = CubeList( + [ + self._testcube(name="this", dataless=False), + self._testcube(name="that", dataless=True), + ] + ) + result = cubes.merge() + assert len(result) == 2 + cube1, cube2 = [result.extract_cube(name) for name in ("this", "that")] + assert not cube1.is_dataless() + assert cube1.is_dataless() + + def test_dataless_merge(self): + # Check that dataless cubes can be merged correctly. + cubes = CubeList( + [ + self._testcube(z=1, dataless=True), + self._testcube(z=2, dataless=True), + ] + ) + cube = cubes.merge_cube() + assert cube.is_dataless() + assert np.all(cube.coord("z").points == [1, 2]) + + def test_dataless_dataful_merge(self): + # Check that dataless cubes can merge **with** regular ones. + # Check that dataless cubes can be merged correctly. + cubes = CubeList( + [ + self._testcube(z=1, dataless=False), + self._testcube(z=2, dataless=True), + ] + ) + cube = cubes.merge_cube() + assert not cube.is_dataless() + data_z1, data_z2 = cube[0].data, cube[1].data + assert np.all(data_z1 == [1, 2, 3]) + assert np.all(np.ma.getmaskarray(data_z2) == True) # noqa: E712 From 4715ad50c764081db29a4758a6443b6e63f804ee Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 8 Oct 2025 12:01:16 +0100 Subject: [PATCH 03/11] Functioning backstop: merge can pass-through dataless, but not actually merge them. --- lib/iris/_merge.py | 4 ++-- lib/iris/tests/integration/merge/test_dataless.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index 1ff80827a9..856261465b 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -1305,8 +1305,8 @@ def register(self, cube, error_on_mismatch=False): this :class:`ProtoCube`. """ - if cube.is_dataless(): - raise iris.exceptions.DatalessError("merge") + # if cube.is_dataless(): + # raise iris.exceptions.DatalessError("merge") cube_signature = self._cube_signature other = self._build_signature(cube) match = cube_signature.match(other, error_on_mismatch) diff --git a/lib/iris/tests/integration/merge/test_dataless.py b/lib/iris/tests/integration/merge/test_dataless.py index 3fd3d04550..00deaeeab1 100644 --- a/lib/iris/tests/integration/merge/test_dataless.py +++ b/lib/iris/tests/integration/merge/test_dataless.py @@ -15,6 +15,7 @@ def _testcube(self, z=1, name="this", dataless=False): # Create a testcube with a scalar Z coord, for merge testing. cube = Cube( [1, 2, 3], + long_name=name, dim_coords_and_dims=[(DimCoord([0.0, 1.0, 2], long_name="x"), 0)], aux_coords_and_dims=[(AuxCoord([z], long_name="z"), ())], ) @@ -22,8 +23,8 @@ def _testcube(self, z=1, name="this", dataless=False): cube.data = None return cube - def test_general_nomerge(self): - # Check that normal merge works OK with dataless cubes included + def test_mixed_passthrough(self): + # Check that normal merge can handle dataless alongside dataful cubes. cubes = CubeList( [ self._testcube(name="this", dataless=False), @@ -34,17 +35,19 @@ def test_general_nomerge(self): assert len(result) == 2 cube1, cube2 = [result.extract_cube(name) for name in ("this", "that")] assert not cube1.is_dataless() - assert cube1.is_dataless() + assert cube2.is_dataless() def test_dataless_merge(self): - # Check that dataless cubes can be merged correctly. + # Check that dataless cubes can be merged. cubes = CubeList( [ self._testcube(z=1, dataless=True), self._testcube(z=2, dataless=True), ] ) - cube = cubes.merge_cube() + cubes = cubes.merge() + assert len(cubes) == 2 + (cube, cube2) = cubes assert cube.is_dataless() assert np.all(cube.coord("z").points == [1, 2]) From 222acb55903075e9725cf7adcdd3655ab2063af3 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 8 Oct 2025 12:30:03 +0100 Subject: [PATCH 04/11] Dataless merge, combine dataless with/without dataful. --- lib/iris/_merge.py | 108 +++++++++++------- .../tests/integration/merge/test_dataless.py | 4 +- 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index 856261465b..8833719e50 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -12,6 +12,7 @@ from collections import OrderedDict, namedtuple from copy import deepcopy +import dask.array as da import numpy as np from iris._lazy_data import ( @@ -320,7 +321,6 @@ class _CubeSignature( "data_type", "cell_measures_and_dims", "ancillary_variables_and_dims", - "is_dataless", ], ) ): @@ -431,10 +431,13 @@ def match(self, other, error_on_mismatch): if self.data_shape != other.data_shape: msg = "cube.shape differs: {} != {}" msgs.append(msg.format(self.data_shape, other.data_shape)) - if self.is_dataless != other.is_dataless: - msg = "cube.is_dataless differs: {} != {}" - msgs.append(msg.format(self.is_dataless, other.is_dataless)) - if self.data_type != other.data_type: + if ( + self.data_type is not None + and other.data_type is not None + and self.data_type != other.data_type + ): + # N.B. allow "None" to match any other dtype: this means that dataless + # cubes can merge with 'dataful' ones. msg = "cube data dtype differs: {} != {}" msgs.append(msg.format(self.data_type, other.data_type)) # Both cell_measures_and_dims and ancillary_variables_and_dims are @@ -1113,9 +1116,6 @@ def __init__(self, cube): source-cube. """ - # if cube.is_dataless(): - # raise iris.exceptions.DatalessError("merge") - # Default hint ordering for candidate dimension coordinates. self._hints = [ "time", @@ -1239,41 +1239,68 @@ def merge(self, unique=True): # Generate group-depth merged cubes from the source-cubes. for level in range(group_depth): - if self._cube_signature.is_dataless: - merged_shape = self._cube_signature.data_shape - # ?WRONG? merged_shape = self._stack_shape - # ?WRONG? merged_shape = (len(nd_indexes),) + shape - merged_data = None - all_have_data = False - else: - # Stack up all the data from all of the relevant source - # cubes in a single dask "stacked" array. - # If it turns out that all the source cubes already had - # their data loaded then at the end we convert the stack back - # into a plain numpy array. - stack = np.empty(self._stack_shape, "object") - all_have_data = True - for nd_index in nd_indexes: - # Get the data of the current existing or last known - # good source-cube - group = group_by_nd_index[nd_index] - offset = min(level, len(group) - 1) - data = self._skeletons[group[offset]].data - # Ensure the data is represented as a dask array and - # slot that array into the stack. + # Stack up all the data from all of the relevant source + # cubes in a single dask "stacked" array. + # If it turns out that all the source cubes already had + # their data loaded then at the end we convert the stack back + # into a plain numpy array. + stack = np.empty(self._stack_shape, "object") + all_have_real_data = True + some_are_dataless = False + part_shape: tuple = None + part_dtype: np.dtype = None + for nd_index in nd_indexes: + # Get the data of the current existing or last known + # good source-cube + group = group_by_nd_index[nd_index] + offset = min(level, len(group) - 1) + data = self._skeletons[group[offset]].data + # Ensure the data is represented as a dask array and + # slot that array into the stack. + if data is None: + some_are_dataless = True + else: + # We have (at least one) array content : Record the shape+dtype + if part_shape is None: + part_shape = data.shape + part_dtype = data.dtype + else: + # We expect that the "parts" should **all be the same** + assert data.shape == part_shape + assert data.dtype == part_dtype + + # ensure lazy (we make the result real, later, if all were real) if is_lazy_data(data): - all_have_data = False + all_have_real_data = False else: data = as_lazy_data(data) - stack[nd_index] = data + stack[nd_index] = data + if part_shape is None: + # NO parts had data : the result will also be dataless + merged_data = None + merged_shape = self._shape + else: + # At least some inputs had data : the result will have a data array. + if some_are_dataless: + # Some parts were dataless: fill these with a lazy all-missing array. + missing_part = da.ma.masked_array( + data=da.zeros(part_shape, dtype=np.dtype("u1")), + mask=da.ones(part_shape, dtype=bool), + dtype=part_dtype, + ) + for inds in np.ndindex(stack.shape): + if stack[inds] is None: + stack[inds] = missing_part + + # Make a single lazy merged result array merged_data = multidim_lazy_stack(stack) merged_shape = None + if all_have_real_data: + # All inputs were concrete, so turn the result back into a + # normal array. + merged_data = as_concrete_data(merged_data) - if all_have_data: - # All inputs were concrete, so turn the result back into a - # normal array. - merged_data = as_concrete_data(merged_data) merged_cube = self._get_cube(merged_data, shape=merged_shape) merged_cubes.append(merged_cube) @@ -1305,8 +1332,6 @@ def register(self, cube, error_on_mismatch=False): this :class:`ProtoCube`. """ - # if cube.is_dataless(): - # raise iris.exceptions.DatalessError("merge") cube_signature = self._cube_signature other = self._build_signature(cube) match = cube_signature.match(other, error_on_mismatch) @@ -1565,6 +1590,12 @@ def _get_cube(self, data, shape=None): Return a fully constructed cube for the given data, containing all its coordinates and metadata. + Parameters + ---------- + data : array_like + Cube data content. If None, `shape` must set and the result is dataless. + shape : tuple, optional + Cube data shape, only used if data is None. """ signature = self._cube_signature dim_coords_and_dims = [ @@ -1726,7 +1757,6 @@ def _build_signature(self, cube): cube.dtype, cube._cell_measures_and_dims, cube._ancillary_variables_and_dims, - cube.is_dataless(), ) def _add_cube(self, cube, coord_payload): diff --git a/lib/iris/tests/integration/merge/test_dataless.py b/lib/iris/tests/integration/merge/test_dataless.py index 00deaeeab1..eeeabe9f39 100644 --- a/lib/iris/tests/integration/merge/test_dataless.py +++ b/lib/iris/tests/integration/merge/test_dataless.py @@ -45,9 +45,7 @@ def test_dataless_merge(self): self._testcube(z=2, dataless=True), ] ) - cubes = cubes.merge() - assert len(cubes) == 2 - (cube, cube2) = cubes + cube = cubes.merge_cube() assert cube.is_dataless() assert np.all(cube.coord("z").points == [1, 2]) From b1be959fd0f593f141406304647645b53699001c Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 8 Oct 2025 14:56:51 +0100 Subject: [PATCH 05/11] Tidy awkward layout in test. --- .../tests/integration/merge/test_merge.py | 112 ++++-------------- 1 file changed, 25 insertions(+), 87 deletions(-) diff --git a/lib/iris/tests/integration/merge/test_merge.py b/lib/iris/tests/integration/merge/test_merge.py index c47231d57d..bb6a652320 100644 --- a/lib/iris/tests/integration/merge/test_merge.py +++ b/lib/iris/tests/integration/merge/test_merge.py @@ -214,93 +214,31 @@ def _make_cube(self, a, b, c, d, data=0): def test_separable_combination(self, request): cubes = iris.cube.CubeList() - cubes.append( - self._make_cube("2005", "ECMWF", "HOPE-E, Sys 1, Met 1, ENSEMBLES", 0) - ) - cubes.append( - self._make_cube("2005", "ECMWF", "HOPE-E, Sys 1, Met 1, ENSEMBLES", 1) - ) - cubes.append( - self._make_cube("2005", "ECMWF", "HOPE-E, Sys 1, Met 1, ENSEMBLES", 2) - ) - cubes.append( - self._make_cube( - "2026", "UK Met Office", "HadGEM2, Sys 1, Met 1, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2026", "UK Met Office", "HadGEM2, Sys 1, Met 1, ENSEMBLES", 1 - ) - ) - cubes.append( - self._make_cube( - "2026", "UK Met Office", "HadGEM2, Sys 1, Met 1, ENSEMBLES", 2 - ) - ) - cubes.append( - self._make_cube("2002", "CERFACS", "GELATO, Sys 0, Met 1, ENSEMBLES", 0) - ) - cubes.append( - self._make_cube("2002", "CERFACS", "GELATO, Sys 0, Met 1, ENSEMBLES", 1) - ) - cubes.append( - self._make_cube("2002", "CERFACS", "GELATO, Sys 0, Met 1, ENSEMBLES", 2) - ) - cubes.append( - self._make_cube("2002", "IFM-GEOMAR", "ECHAM5, Sys 1, Met 10, ENSEMBLES", 0) - ) - cubes.append( - self._make_cube("2002", "IFM-GEOMAR", "ECHAM5, Sys 1, Met 10, ENSEMBLES", 1) - ) - cubes.append( - self._make_cube("2002", "IFM-GEOMAR", "ECHAM5, Sys 1, Met 10, ENSEMBLES", 2) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 10, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 11, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 12, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 13, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 14, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 15, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 16, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 17, ENSEMBLES", 0 - ) - ) - cubes.append( - self._make_cube( - "2502", "UK Met Office", "HadCM3, Sys 51, Met 18, ENSEMBLES", 0 - ) - ) + + def add(*args): + cubes.append(self._make_cube(*args)) + + add("2005", "ECMWF", "HOPE-E, Sys 1, Met 1, ENSEMBLES", 0) + add("2005", "ECMWF", "HOPE-E, Sys 1, Met 1, ENSEMBLES", 1) + add("2005", "ECMWF", "HOPE-E, Sys 1, Met 1, ENSEMBLES", 2) + add("2026", "UK Met Office", "HadGEM2, Sys 1, Met 1, ENSEMBLES", 0) + add("2026", "UK Met Office", "HadGEM2, Sys 1, Met 1, ENSEMBLES", 1) + add("2026", "UK Met Office", "HadGEM2, Sys 1, Met 1, ENSEMBLES", 2) + add("2002", "CERFACS", "GELATO, Sys 0, Met 1, ENSEMBLES", 0) + add("2002", "CERFACS", "GELATO, Sys 0, Met 1, ENSEMBLES", 1) + add("2002", "CERFACS", "GELATO, Sys 0, Met 1, ENSEMBLES", 2) + add("2002", "IFM-GEOMAR", "ECHAM5, Sys 1, Met 10, ENSEMBLES", 0) + add("2002", "IFM-GEOMAR", "ECHAM5, Sys 1, Met 10, ENSEMBLES", 1) + add("2002", "IFM-GEOMAR", "ECHAM5, Sys 1, Met 10, ENSEMBLES", 2) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 10, ENSEMBLES", 0) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 11, ENSEMBLES", 0) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 12, ENSEMBLES", 0) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 13, ENSEMBLES", 0) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 14, ENSEMBLES", 0) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 15, ENSEMBLES", 0) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 16, ENSEMBLES", 0) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 17, ENSEMBLES", 0) + add("2502", "UK Met Office", "HadCM3, Sys 51, Met 18, ENSEMBLES", 0) cube = cubes.merge() assert_CML( request, cube, ("merge", "separable_combination.cml"), checksum=False From 37fc7f5003947ab539dc01d01fd8c0e50a84a23b Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 8 Oct 2025 17:18:37 +0100 Subject: [PATCH 06/11] Ensure that cube.shape can only be a tuple (or None). --- lib/iris/_data_manager.py | 16 +++-- .../merge/test_merge_with_dataless.py | 65 +++++++++++++++++++ 2 files changed, 75 insertions(+), 6 deletions(-) create mode 100644 lib/iris/tests/integration/merge/test_merge_with_dataless.py diff --git a/lib/iris/_data_manager.py b/lib/iris/_data_manager.py index 00de2b5ae6..6e3c3ce1e3 100644 --- a/lib/iris/_data_manager.py +++ b/lib/iris/_data_manager.py @@ -34,12 +34,16 @@ def __init__(self, data, shape=None): dataless. """ - if (shape is None) and (data is None): - msg = 'one of "shape" or "data" should be provided; both are None' - raise ValueError(msg) - elif (shape is not None) and (data is not None): - msg = '"shape" should only be provided if "data" is None' - raise ValueError(msg) + if shape is None: + if data is None: + msg = 'one of "shape" or "data" should be provided; both are None' + raise ValueError(msg) + else: + if data is not None: + msg = '"shape" should only be provided if "data" is None' + raise ValueError(msg) + # Normalise how shape is recorded + shape = tuple(shape) # Initialise the instance. self._shape = shape diff --git a/lib/iris/tests/integration/merge/test_merge_with_dataless.py b/lib/iris/tests/integration/merge/test_merge_with_dataless.py new file mode 100644 index 0000000000..f7ff89974a --- /dev/null +++ b/lib/iris/tests/integration/merge/test_merge_with_dataless.py @@ -0,0 +1,65 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Basic integration tests for merging with dataless cubes.""" + +import numpy as np + +from iris.coords import AuxCoord, DimCoord +from iris.cube import Cube, CubeList + + +class TestMergeDataless: + def _testcube(self, z=1, name="this", dataless=False): + # Create a testcube with a scalar Z coord, for merge testing. + cube = Cube( + [1, 2, 3], + long_name=name, + dim_coords_and_dims=[(DimCoord([0.0, 1.0, 2], long_name="x"), 0)], + aux_coords_and_dims=[(AuxCoord([z], long_name="z"), ())], + ) + if dataless: + cube.data = None + return cube + + def test_mixed_passthrough(self): + # Check that normal merge can handle dataless alongside dataful cubes. + cubes = CubeList( + [ + self._testcube(name="this", dataless=False), + self._testcube(name="that", dataless=True), + ] + ) + result = cubes.merge() + assert len(result) == 2 + cube1, cube2 = [result.extract_cube(name) for name in ("this", "that")] + assert not cube1.is_dataless() + assert cube2.is_dataless() + + def test_dataless_merge(self): + # Check that dataless cubes can be merged. + cubes = CubeList( + [ + self._testcube(z=1, dataless=True), + self._testcube(z=2, dataless=True), + ] + ) + cube = cubes.merge_cube() + assert cube.is_dataless() + assert np.all(cube.coord("z").points == [1, 2]) + + def test_dataless_dataful_merge(self): + # Check that dataless cubes can merge **with** regular ones. + # Check that dataless cubes can be merged correctly. + cubes = CubeList( + [ + self._testcube(z=1, dataless=False), + self._testcube(z=2, dataless=True), + ] + ) + cube = cubes.merge_cube() + assert not cube.is_dataless() + data_z1, data_z2 = cube[0].data, cube[1].data + assert np.all(data_z1 == [1, 2, 3]) + assert np.all(np.ma.getmaskarray(data_z2) == True) # noqa: E712 From 6b0087c5e112cd543506f50cefebd228f8dd5287 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 8 Oct 2025 17:38:47 +0100 Subject: [PATCH 07/11] Make test_merge check against dataless input in all its tests. --- .../tests/integration/merge/test_merge.py | 129 ++++++++++++++++-- .../merge/test_merge_with_dataless.py | 65 --------- 2 files changed, 119 insertions(+), 75 deletions(-) delete mode 100644 lib/iris/tests/integration/merge/test_merge_with_dataless.py diff --git a/lib/iris/tests/integration/merge/test_merge.py b/lib/iris/tests/integration/merge/test_merge.py index bb6a652320..39abc721af 100644 --- a/lib/iris/tests/integration/merge/test_merge.py +++ b/lib/iris/tests/integration/merge/test_merge.py @@ -10,6 +10,7 @@ import pytest from iris.coords import AuxCoord, DimCoord +import iris.cube from iris.cube import Cube, CubeList from iris.tests._shared_utils import ( assert_array_equal, @@ -19,6 +20,114 @@ ) import iris.tests.stock +_ORIGINAL_MERGE = iris.cube.CubeList.merge +_ORIGINAL_MERGE_CUBE = iris.cube.CubeList.merge_cube + +# Testing options for checking that merge works ~same when some inputs are dataless +_DATALESS_TEST_OPTIONS = [ + "dataless_none", + "dataless_one", + "dataless_all", + "dataless_allbut1", +] + + +@pytest.fixture(params=_DATALESS_TEST_OPTIONS) +def dataless_option(request): + return request.param + + +def mangle_cubelist(cubelist, dataless_option): + """Return a modified cubelist, where some cubes are dataless. + + 'dataless_option' controls whether 0, 1, N or N-1 cubes are made dataless. + """ + assert isinstance(cubelist, CubeList) + n_cubes = len(cubelist) + result = CubeList([]) + ind_one = len(cubelist) // 3 + for i_cube, cube in enumerate(cubelist): + if ( + (dataless_option == "dataless_one" and i_cube == ind_one) + or (dataless_option == "dataless_allbut1" and i_cube != ind_one) + or dataless_option == "dataless_all" + ): + # Make this one dataless + cube = cube.copy() + cube.data = None + + result.append(cube) + + # Do a quick post-test + assert len(result) == len(cubelist) + count = sum([cube.is_dataless() for cube in result]) + expected = { + "dataless_none": 0, + "dataless_one": 1, + "dataless_all": n_cubes, + "dataless_allbut1": n_cubes - 1, + }[dataless_option] + assert count == expected + + return result + + +def check_merge_against_dataless_cases( + function, original_input, *args, dataless_option=None +): + # Compute the "normal" result. + original_result = function(original_input, *args) + + if dataless_option != "dataless_none": + # Re-run with "mangled" inputs, and compare the result with the normal case. + mangled_input = mangle_cubelist(original_input, dataless_option) + mangled_result = function(mangled_input, *args) + + # Normalise to get a list of cubes + if isinstance(original_result, Cube): # I.E. not if a single Cube + result_cubes = [original_result] + mangled_cubes = [mangled_result] + else: + result_cubes = original_result + mangled_cubes = mangled_result + + # If **all** input is dataless, all output should be dataless too + if dataless_option == "dataless_all": + assert all([cube.is_dataless() for cube in mangled_cubes]) + + # We should get all the same cubes, **except** for the data content + assert len(mangled_cubes) == len(result_cubes) + for cube1, cube2 in zip(mangled_cubes, result_cubes): + cube1, cube2 = [cube.copy() for cube in (cube1, cube2)] + for cube in (cube1, cube2): + cube.data = None + if cube1 != cube2: + assert cube1 == cube2 + + return original_result + + +class DatalessMixin: + # Mixin class to make every merge check for operation with dataless cubes + @pytest.fixture(autouse=True) + def setup_patch(self, mocker, dataless_option): + # NB these patch functions must be generated dynamically (for each test + # parametrisation), so that they can access the 'dataless_option' switch. + def patched_merge(cubelist, unique=True): + return check_merge_against_dataless_cases( + _ORIGINAL_MERGE, cubelist, unique, dataless_option=dataless_option + ) + + def patched_merge_cube(cubelist): + return check_merge_against_dataless_cases( + _ORIGINAL_MERGE_CUBE, cubelist, dataless_option=dataless_option + ) + + # Patch **all** uses of CubeList.merge/merge_cube within these tests, to compare + # "normal" results with those which have some dataless inputs. + mocker.patch("iris.cube.CubeList.merge", patched_merge) + mocker.patch("iris.cube.CubeList.merge_cube", patched_merge_cube) + class MergeMixin: """Mix-in class for attributes & utilities common to these test cases.""" @@ -45,7 +154,7 @@ def test_duplication(self): @skip_data -class TestSingleCube(MergeMixin): +class TestSingleCube(MergeMixin, DatalessMixin): def setup_method(self): self._data_path = get_data_path(("PP", "globClim1", "theta.pp")) self._num_cubes = 1 @@ -53,7 +162,7 @@ def setup_method(self): @skip_data -class TestMultiCube(MergeMixin): +class TestMultiCube(MergeMixin, DatalessMixin): def setup_method(self): self._data_path = get_data_path(("PP", "globClim1", "dec_subset.pp")) self._num_cubes = 4 @@ -75,7 +184,7 @@ def custom_coord_callback(cube, field, filename): @skip_data -class TestColpex: +class TestColpex(DatalessMixin): def setup_method(self): self._data_path = get_data_path(("PP", "COLPEX", "small_colpex_theta_p_alt.pp")) @@ -86,7 +195,7 @@ def test_colpex(self, request): @skip_data -class TestDataMerge: +class TestDataMerge(DatalessMixin): def test_extended_proxy_data(self, request): # Get the empty theta cubes for T+1.5 and T+2 data_path = get_data_path(("PP", "COLPEX", "theta_and_orog_subset.pp")) @@ -119,7 +228,7 @@ def test_real_data(self, request): assert_CML(request, cubes, ["merge", "theta.cml"]) -class TestDimensionSplitting: +class TestDimensionSplitting(DatalessMixin): def _make_cube(self, a, b, c, data): cube_data = np.empty((4, 5), dtype=np.float32) cube_data[:] = data @@ -182,7 +291,7 @@ def test_multi_split(self, request): assert_CML(request, cube, ("merge", "multi_split.cml")) -class TestCombination: +class TestCombination(DatalessMixin): def _make_cube(self, a, b, c, d, data=0): cube_data = np.empty((4, 5), dtype=np.float32) cube_data[:] = data @@ -245,7 +354,7 @@ def add(*args): ) -class TestDimSelection: +class TestDimSelection(DatalessMixin): def _make_cube(self, a, b, data=0, a_dim=False, b_dim=False): cube_data = np.empty((4, 5), dtype=np.float32) cube_data[:] = data @@ -360,7 +469,7 @@ def test_a_dim_b_dim(self, request): assert cube.coord("b") in cube.aux_coords -class TestTimeTripleMerging: +class TestTimeTripleMerging(DatalessMixin): def _make_cube(self, a, b, c, data=0): cube_data = np.empty((4, 5), dtype=np.float32) cube_data[:] = data @@ -583,7 +692,7 @@ def test_simple3(self, request): assert_CML(request, cube, ("merge", "time_triple_merging5.cml"), checksum=False) -class TestCubeMergeTheoretical: +class TestCubeMergeTheoretical(DatalessMixin): def test_simple_bounds_merge(self, request): cube1 = iris.tests.stock.simple_2d() cube2 = iris.tests.stock.simple_2d() @@ -649,7 +758,7 @@ def test_simple_points_merge(self, request): assert_CML(request, r, ("cube_merge", "test_simple_attributes3.cml")) -class TestContiguous: +class TestContiguous(DatalessMixin): def test_form_contiguous_dimcoord(self): # Test that cube sliced up and remerged in the opposite order maintains # contiguity. diff --git a/lib/iris/tests/integration/merge/test_merge_with_dataless.py b/lib/iris/tests/integration/merge/test_merge_with_dataless.py deleted file mode 100644 index f7ff89974a..0000000000 --- a/lib/iris/tests/integration/merge/test_merge_with_dataless.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Basic integration tests for merging with dataless cubes.""" - -import numpy as np - -from iris.coords import AuxCoord, DimCoord -from iris.cube import Cube, CubeList - - -class TestMergeDataless: - def _testcube(self, z=1, name="this", dataless=False): - # Create a testcube with a scalar Z coord, for merge testing. - cube = Cube( - [1, 2, 3], - long_name=name, - dim_coords_and_dims=[(DimCoord([0.0, 1.0, 2], long_name="x"), 0)], - aux_coords_and_dims=[(AuxCoord([z], long_name="z"), ())], - ) - if dataless: - cube.data = None - return cube - - def test_mixed_passthrough(self): - # Check that normal merge can handle dataless alongside dataful cubes. - cubes = CubeList( - [ - self._testcube(name="this", dataless=False), - self._testcube(name="that", dataless=True), - ] - ) - result = cubes.merge() - assert len(result) == 2 - cube1, cube2 = [result.extract_cube(name) for name in ("this", "that")] - assert not cube1.is_dataless() - assert cube2.is_dataless() - - def test_dataless_merge(self): - # Check that dataless cubes can be merged. - cubes = CubeList( - [ - self._testcube(z=1, dataless=True), - self._testcube(z=2, dataless=True), - ] - ) - cube = cubes.merge_cube() - assert cube.is_dataless() - assert np.all(cube.coord("z").points == [1, 2]) - - def test_dataless_dataful_merge(self): - # Check that dataless cubes can merge **with** regular ones. - # Check that dataless cubes can be merged correctly. - cubes = CubeList( - [ - self._testcube(z=1, dataless=False), - self._testcube(z=2, dataless=True), - ] - ) - cube = cubes.merge_cube() - assert not cube.is_dataless() - data_z1, data_z2 = cube[0].data, cube[1].data - assert np.all(data_z1 == [1, 2, 3]) - assert np.all(np.ma.getmaskarray(data_z2) == True) # noqa: E712 From 46ccc675b43f706053a040f0b643a7a4e555bbee Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 10 Oct 2025 14:14:25 +0100 Subject: [PATCH 08/11] Improve tests, and test for lazy merge result. --- .../tests/integration/merge/test_dataless.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/lib/iris/tests/integration/merge/test_dataless.py b/lib/iris/tests/integration/merge/test_dataless.py index eeeabe9f39..640dc71456 100644 --- a/lib/iris/tests/integration/merge/test_dataless.py +++ b/lib/iris/tests/integration/merge/test_dataless.py @@ -4,6 +4,7 @@ # See LICENSE in the root of the repository for full licensing details. """Integration tests for merging with dataless cubes.""" +import dask.array as da import numpy as np from iris.coords import AuxCoord, DimCoord @@ -11,10 +12,11 @@ class TestMergeDataless: - def _testcube(self, z=1, name="this", dataless=False): + def _testcube(self, z=1, name="this", dataless=False, lazy=False): # Create a testcube with a scalar Z coord, for merge testing. + data = da.arange(3) if lazy else np.arange(3) cube = Cube( - [1, 2, 3], + data, long_name=name, dim_coords_and_dims=[(DimCoord([0.0, 1.0, 2], long_name="x"), 0)], aux_coords_and_dims=[(AuxCoord([z], long_name="z"), ())], @@ -25,13 +27,12 @@ def _testcube(self, z=1, name="this", dataless=False): def test_mixed_passthrough(self): # Check that normal merge can handle dataless alongside dataful cubes. - cubes = CubeList( - [ - self._testcube(name="this", dataless=False), - self._testcube(name="that", dataless=True), - ] - ) + cube_normal = self._testcube(name="this", dataless=False) + cube_dataless = self._testcube(name="that", dataless=True) + cubes = CubeList([cube_normal, cube_dataless]) + result = cubes.merge() + assert len(result) == 2 cube1, cube2 = [result.extract_cube(name) for name in ("this", "that")] assert not cube1.is_dataless() @@ -39,27 +40,26 @@ def test_mixed_passthrough(self): def test_dataless_merge(self): # Check that dataless cubes can be merged. - cubes = CubeList( - [ - self._testcube(z=1, dataless=True), - self._testcube(z=2, dataless=True), - ] - ) + cube_1 = self._testcube(z=1, dataless=True) + cube_2 = self._testcube(z=2, dataless=True) + cubes = CubeList([cube_1, cube_2]) + cube = cubes.merge_cube() + assert cube.is_dataless() assert np.all(cube.coord("z").points == [1, 2]) def test_dataless_dataful_merge(self): # Check that dataless cubes can merge **with** regular ones. - # Check that dataless cubes can be merged correctly. - cubes = CubeList( - [ - self._testcube(z=1, dataless=False), - self._testcube(z=2, dataless=True), - ] - ) + # Include checking that laziness is preserved. + cube_normal = self._testcube(z=1, dataless=False, lazy=True) + cube_dataless = self._testcube(z=2, dataless=True) + cubes = CubeList([cube_normal, cube_dataless]) + cube = cubes.merge_cube() + assert not cube.is_dataless() + assert cube.has_lazy_data() data_z1, data_z2 = cube[0].data, cube[1].data - assert np.all(data_z1 == [1, 2, 3]) + assert np.all(data_z1 == [0, 1, 2]) assert np.all(np.ma.getmaskarray(data_z2) == True) # noqa: E712 From 1c4f14c12aa2865d35d46ed98b457cc7220e4245 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 10 Oct 2025 14:14:48 +0100 Subject: [PATCH 09/11] Fix typo. --- lib/iris/cube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 89afa78bde..10ca3e7e9a 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -5095,7 +5095,7 @@ def interpolate( """ if self.is_dataless(): - raise iris.exceptions.DatalessError("interoplate") + raise iris.exceptions.DatalessError("interpolate") coords, points = zip(*sample_points) interp = scheme.interpolator(self, coords) # type: ignore[arg-type] return interp(points, collapse_scalar=collapse_scalar) From e3eec6f6f4ef8a9be5cc7403e2e8faab158b3eb0 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 10 Oct 2025 14:16:12 +0100 Subject: [PATCH 10/11] Expand documentation. --- docs/src/further_topics/dataless_cubes.rst | 106 ++++++++++++++++++ docs/src/further_topics/index.rst | 1 + docs/src/whatsnew/latest.rst | 8 ++ .../tests/integration/merge/test_merge.py | 3 +- 4 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 docs/src/further_topics/dataless_cubes.rst diff --git a/docs/src/further_topics/dataless_cubes.rst b/docs/src/further_topics/dataless_cubes.rst new file mode 100644 index 0000000000..65c2317880 --- /dev/null +++ b/docs/src/further_topics/dataless_cubes.rst @@ -0,0 +1,106 @@ +.. _dataless-cubes: + +============== +Dataless Cubes +============== +It is possible for a cube to exist without a data payload. +In this case ``cube.data`` is ``None``, instead of containing an array (real or lazy) as +usual. + +This can be useful when the cube is used purely as a placeholder for metadata, e.g. to +represent a combination of coordinates. + +Most notably, dataless cubes can be used as the target "grid cube" for most regridding +schemes, since in that case the cube's coordinates are all that the method uses. +See also :meth:`iris.util.make_gridcube`. + + +Properties of dataless cubes +---------------------------- + +* ``cube.shape`` is unchanged +* ``cube.data`` == ``None`` +* ``cube.dtype`` == ``None`` +* ``cube.core_data()`` == ``cube.lazy_data()`` == ``None`` +* ``cube.is_dataless()`` == ``True`` +* ``cube.has_lazy_data()`` == ``False`` + + +Cube creation +------------- +You can create a dataless cube with the :meth:`~iris.cube.Cube` constructor +(i.e. ``__init__`` call), by specifying the ``shape`` keyword in place of ``data``. +If both are specified, an error is raised (even if data and shape are compatible). + + +Data assignment +--------------- +You can make an existing cube dataless, by setting ``cube.data = None``. +The data array is simply discarded. + + +Cube copy +--------- +The syntax that allows you to replace data on copying, +e.g. ``cube2 = cube.copy(new_data)``, has now extended to accept the special value +:data:`iris.DATALESS`. + +So, ``cube2 = cube.copy(iris.DATALESS)`` makes ``cube2`` a +dataless copy of ``cube``. +This is equivalent to ``cube2 = cube.copy(); cube2.data = None``. + + +Save and Load +------------- +The netcdf file interface can save and re-load dataless cubes correctly. +See : :ref:`netcdf_dataless`. + + +.. _dataless_merge: + +Merging +------- +Merging is fully supported for dataless cubes, including combining them with "normal" +cubes. + +* in all cases, the result has the same shape and metadata as if the same cubes had + data. +* Merging multiple dataless cubes produces a dataless result. +* Merging dataless and non-dataless cubes results in a partially 'missing' data array, + i.e. the relevant sections are filled with masked data. +* Laziness is also preserved. + + +Operations NOT supported +------------------------- +Dataless cubes are relatively new, and only partly integrated with Iris cube operations +generally. + +The following are some of the notable features which do *not* support dataless cubes, +at least as yet : + +* plotting + +* cube arithmetic + +* statistics + +* concatenation + +* :meth:`iris.cube.CubeList.realise_data` + +* various :class:`~iris.cube.Cube` methods, including at least: + + * :meth:`~iris.cube.Cube.convert_units` + + * :meth:`~iris.cube.Cube.subset` + + * :meth:`~iris.cube.Cube.intersection` + + * :meth:`~iris.cube.Cube.slices` + + * :meth:`~iris.cube.Cube.interpolate` + + * :meth:`~iris.cube.Cube.regrid` + Note: in this case the target ``grid`` can be dataless, but not the source + (``self``) cube. diff --git a/docs/src/further_topics/index.rst b/docs/src/further_topics/index.rst index 67cadc4612..883059a025 100644 --- a/docs/src/further_topics/index.rst +++ b/docs/src/further_topics/index.rst @@ -15,6 +15,7 @@ Extra information on specific technical issues. lenient_maths um_files_loading missing_data_handling + dataless_cubes netcdf_io dask_best_practices/index ugrid/index diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 36d757e2f5..016b6db68e 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -40,6 +40,14 @@ This document explains the changes made to Iris for this release :func:`~iris.fileformats.netcdf.saver.save_mesh` also supports ``zlib`` compression. (:issue:`6565`, :pull:`6728`) +#. `@pp-mo`_ added the ability to merge dataless cubes. This also means they can be + re-loaded normally with :meth:`iris.load`. See: :ref:`dataless_merge`. + (:issue:`5770`, :pull:`6581`) + +#. `@pp-mo`_ added a documentation section on dataless cubes. + See: :ref:`dataless-cubes`. + (:issue:`XXX`, :pull:`XXX`) + 🐛 Bugs Fixed ============= diff --git a/lib/iris/tests/integration/merge/test_merge.py b/lib/iris/tests/integration/merge/test_merge.py index 39abc721af..026725dafe 100644 --- a/lib/iris/tests/integration/merge/test_merge.py +++ b/lib/iris/tests/integration/merge/test_merge.py @@ -53,8 +53,7 @@ def mangle_cubelist(cubelist, dataless_option): or dataless_option == "dataless_all" ): # Make this one dataless - cube = cube.copy() - cube.data = None + cube = cube.copy(iris.DATALESS) result.append(cube) From e0c2cd8fbfbf59773636baac4d22f8f42954b864 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 10 Oct 2025 15:34:38 +0100 Subject: [PATCH 11/11] Fix broken ref + tweak whatsnew. --- docs/src/further_topics/dataless_cubes.rst | 3 ++- docs/src/whatsnew/latest.rst | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/src/further_topics/dataless_cubes.rst b/docs/src/further_topics/dataless_cubes.rst index 65c2317880..f63cb65dc6 100644 --- a/docs/src/further_topics/dataless_cubes.rst +++ b/docs/src/further_topics/dataless_cubes.rst @@ -53,7 +53,8 @@ This is equivalent to ``cube2 = cube.copy(); cube2.data = None``. Save and Load ------------- The netcdf file interface can save and re-load dataless cubes correctly. -See : :ref:`netcdf_dataless`. +TODO: link to ref e.g. "netcdf_dataless" in netcdf docs, +when #6339 "Dataless netcdf save+load" is in place. .. _dataless_merge: diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 016b6db68e..0834702fbe 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -42,12 +42,9 @@ This document explains the changes made to Iris for this release #. `@pp-mo`_ added the ability to merge dataless cubes. This also means they can be re-loaded normally with :meth:`iris.load`. See: :ref:`dataless_merge`. + Also added a new documentation section on dataless cubes. (:issue:`5770`, :pull:`6581`) -#. `@pp-mo`_ added a documentation section on dataless cubes. - See: :ref:`dataless-cubes`. - (:issue:`XXX`, :pull:`XXX`) - 🐛 Bugs Fixed =============