diff --git a/.isort.cfg b/.isort.cfg index 52ac204c..0f1f7e33 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,5 +1,5 @@ [settings] -known_third_party = dask,fsspec,numcodecs,numpy,pytest,scipy,skimage,zarr +known_third_party = dask,numcodecs,numpy,pytest,scipy,skimage,zarr multi_line_output = 3 include_trailing_comma = True force_grid_wrap = 0 diff --git a/.readthedocs.yml b/.readthedocs.yml index 7f120630..ecef8025 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,7 +9,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.10" + python: "3.12" # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/README.rst b/README.rst index 9a9fd538..19c04947 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ ome-zarr-py Tools for multi-resolution images stored in Zarr filesets, according to the `OME NGFF spec`_. -See `Readthedocs `_ for usage information. +See `Documentation `_ for usage information. Documentation ------------- diff --git a/docs/requirements.txt b/docs/requirements.txt index 43edcccf..15541c48 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,7 @@ -sphinx==7.1.2 +sphinx==8.1.3 sphinx-rtd-theme==3.0.2 fsspec -zarr +zarr>=v3.0.0 dask numpy scipy diff --git a/docs/source/cli.rst b/docs/source/cli.rst index 66f7a665..608c5993 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -19,11 +19,11 @@ Use the `ome_zarr` command to interrogate Zarr datasets. Remote data:: - ome_zarr info https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/6001240.zarr/ + ome_zarr info https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr/ Local data:: - ome_zarr info 6001240.zarr/ + ome_zarr info 6001240_labels.zarr/ view ==== @@ -47,11 +47,11 @@ download To download all the resolutions and metadata for an image use ``ome_zarr download``. This creates ``6001240.zarr`` locally:: - ome_zarr download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/6001240.zarr/ + ome_zarr download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr Specify a different output directory:: - ome_zarr download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/6001240.zarr/ --output image_dir + ome_zarr download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr --output image_dir create ====== @@ -59,13 +59,15 @@ create To create sample OME-Zarr image from the `skimage `_ data. -Create an OME-Zarr image in coinsdata/ dir:: +Create an OME-Zarr image in coinsdata/ dir using 'coins' method in OME-Zarr latest version or v0.4:: - ome_zarr create coinsdata + ome_zarr create coinsdata.zarr + + ome_zarr create coinsdata.zarr --format 0.4 Create an rgb image from skimage astronaut in testimage dir:: - ome_zarr create testimage --method=astronaut + ome_zarr create testimage.zarr --method=astronaut csv to labels ============= diff --git a/docs/source/index.rst b/docs/source/index.rst index a27693c4..8dacb7c1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -4,6 +4,8 @@ ome-zarr-py Tools for reading and writing multi-resolution images stored in Zarr filesets, according to the `OME NGFF spec`_. +NB: The default version of OME-Zarr written by ``ome-zarr-py`` is ``v0.5``, which uses ``zarr v3``. OME-Zarr v0.5 +is not yet supported by all OME-Zarr tools. See the documentation for more information on how to write other versions. Features -------- diff --git a/docs/source/python.rst b/docs/source/python.rst index 9a4c7840..c98e64f6 100644 --- a/docs/source/python.rst +++ b/docs/source/python.rst @@ -13,13 +13,18 @@ of 2 in the X and Y dimensions. Alternatively, the :py:func:`ome_zarr.writer.write_multiscale` can be used, which takes a "pyramid" of pre-computed `numpy` arrays. +The default version of OME-NGFF is v0.5, is based on Zarr v3. A zarr v3 store is created +by `parse_url()` below. To write OME-NGFF v0.4 (Zarr v2), use the `fmt=FormatV04()` argument +in `parse_url()`, which will create a Zarr v2 store. + The following code creates a 3D Image in OME-Zarr:: import numpy as np import zarr from ome_zarr.io import parse_url - from ome_zarr.writer import write_image + from ome_zarr.format import FormatV04 + from ome_zarr.writer import write_image, add_metadata path = "test_ngff_image.zarr" @@ -28,10 +33,11 @@ The following code creates a 3D Image in OME-Zarr:: rng = np.random.default_rng(0) data = rng.poisson(lam=10, size=(size_z, size_xy, size_xy)).astype(np.uint8) - # write the image data + # Use fmt=FormatV04() to write v0.4 format (zarr v2) store = parse_url(path, mode="w").store root = zarr.group(store=store) - write_image(image=data, group=root, axes="zyx", storage_options=dict(chunks=(1, size_xy, size_xy))) + write_image(image=data, group=root, axes="zyx", + storage_options=dict(chunks=(1, size_xy, size_xy))) This image can be viewed in `napari` using the @@ -41,18 +47,18 @@ This image can be viewed in `napari` using the Rendering settings ------------------ -Render settings can be added to an existing zarr group:: +Rendering settings can be added to an existing zarr group:: store = parse_url(path, mode="w").store root = zarr.group(store=store) - root.attrs["omero"] = { + add_metadata(root, {"omero": { "channels": [{ "color": "00FFFF", "window": {"start": 0, "end": 20, "min": 0, "max": 255}, "label": "random", "active": True, }] - } + }}) Writing labels -------------- @@ -64,10 +70,11 @@ The following code creates a 3D Image in OME-Zarr with labels:: import os from skimage.data import binary_blobs + from ome_zarr.format import FormatV04 from ome_zarr.io import parse_url - from ome_zarr.writer import write_image + from ome_zarr.writer import write_image, add_metadata - path = "test_ngff_image.zarr" + path = "test_ngff_image_labels.zarr" os.mkdir(path) mean_val=10 @@ -76,19 +83,20 @@ The following code creates a 3D Image in OME-Zarr with labels:: rng = np.random.default_rng(0) data = rng.poisson(mean_val, size=(size_z, size_xy, size_xy)).astype(np.uint8) - # write the image data + # Use fmt=FormatV04() to write v0.4 format (zarr v2) store = parse_url(path, mode="w").store root = zarr.group(store=store) - write_image(image=data, group=root, axes="zyx", storage_options=dict(chunks=(1, size_xy, size_xy))) + write_image(image=data, group=root, axes="zyx", + storage_options=dict(chunks=(1, size_xy, size_xy))) # optional rendering settings - root.attrs["omero"] = { + add_metadata(root, {"omero": { "channels": [{ "color": "00FFFF", "window": {"start": 0, "end": 20, "min": 0, "max": 255}, "label": "random", "active": True, }] - } + }}) # add labels... @@ -104,18 +112,19 @@ The following code creates a 3D Image in OME-Zarr with labels:: labels_grp = root.create_group("labels") # the 'labels' .zattrs lists the named labels data label_name = "blobs" - labels_grp.attrs["labels"] = [label_name] + add_metadata(labels_grp, {"labels": [label_name]}) label_grp = labels_grp.create_group(label_name) - # need 'image-label' attr to be recognized as label - label_grp.attrs["image-label"] = { + write_image(label, label_grp, axes="zyx") + + # we need 'image-label' attr to be recognized as label + add_metadata(label_grp, {"image-label": { "colors": [ {"label-value": 1, "rgba": [255, 0, 0, 255]}, {"label-value": 2, "rgba": [0, 255, 0, 255]}, {"label-value": 3, "rgba": [255, 255, 0, 255]} ] - } + }}) - write_image(label, label_grp, axes="zyx") Writing HCS datasets to OME-NGFF -------------------------------- @@ -125,6 +134,7 @@ This sample code shows how to write a high-content screening dataset (i.e. cultu import numpy as np import zarr + from ome_zarr.format import FormatV04 from ome_zarr.io import parse_url from ome_zarr.writer import write_image, write_plate_metadata, write_well_metadata @@ -144,6 +154,7 @@ This sample code shows how to write a high-content screening dataset (i.e. cultu data = rng.poisson(mean_val, size=(num_wells, num_fields, size_z, size_xy, size_xy)).astype(np.uint8) # write the plate of images and corresponding metadata + # Use fmt=FormatV04() in parse_url() to write v0.4 format (zarr v2) store = parse_url(path, mode="w").store root = zarr.group(store=store) write_plate_metadata(root, row_names, col_names, well_paths) @@ -154,7 +165,8 @@ This sample code shows how to write a high-content screening dataset (i.e. cultu write_well_metadata(well_group, field_paths) for fi, field in enumerate(field_paths): image_group = well_group.require_group(str(field)) - write_image(image=data[wi, fi], group=image_group, axes="zyx", storage_options=dict(chunks=(1, size_xy, size_xy))) + write_image(image=data[wi, fi], group=image_group, axes="zyx", + storage_options=dict(chunks=(1, size_xy, size_xy))) This image can be viewed in `napari` using the @@ -177,11 +189,9 @@ the data is available as `dask` arrays:: from ome_zarr.reader import Reader import napari - url = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr" + url = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr" # read the image data - store = parse_url(url, mode="r").store - reader = Reader(parse_url(url)) # nodes may include images, labels etc nodes = list(reader()) @@ -207,6 +217,7 @@ Writing big image from tiles:: import os import zarr from ome_zarr.io import parse_url + from ome_zarr.format import CurrentFormat, FormatV04 from ome_zarr.reader import Reader from ome_zarr.writer import write_multiscales_metadata from ome_zarr.dask_utils import resize as da_resize @@ -214,19 +225,24 @@ Writing big image from tiles:: import dask.array as da from math import ceil + fmt = CurrentFormat() + # Use fmt=FormatV04() to write v0.4 format (zarr v2) + url = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.3/9836842.zarr" reader = Reader(parse_url(url)) nodes = list(reader()) # first level of the pyramid dask_data = nodes[0].data[0] tile_size = 512 + axes = [{"name": "c", "type": "channel"}, {"name": "y", "type": "space"}, {"name": "x", "type": "space"}] def downsample_pyramid_on_disk(parent, paths): """ Takes a high-resolution Zarr array at paths[0] in the zarr group and down-samples it by a factor of 2 for each of the other paths """ - group_path = parent.store.path + group_path = str(parent.store_path) + img_path = parent.store_path / parent.path image_path = os.path.join(group_path, parent.path) print("downsample_pyramid_on_disk", image_path) for count, path in enumerate(paths[1:]): @@ -246,10 +262,16 @@ Writing big image from tiles:: dask_image, tuple(dims), preserve_range=True, anti_aliasing=False ) + options = {} + if fmt.zarr_format == 2: + options["dimension_separator"] = "/" + else: + options["chunk_key_encoding"] = fmt.chunk_key_encoding + options["dimension_names"] = [axis["name"] for axis in axes] # write to disk da.to_zarr( - arr=output, url=image_path, component=path, - dimension_separator=parent._store._dimension_separator, + arr=output, url=img_path, component=path, + zarr_format=fmt.zarr_format, **options ) return paths @@ -270,16 +292,18 @@ Writing big image from tiles:: row_count = ceil(shape[-2]/tile_size) col_count = ceil(shape[-1]/tile_size) - store = parse_url("9836842.zarr", mode="w").store + store = parse_url("9836842.zarr", mode="w", fmt=fmt).store root = zarr.group(store=store) # create empty array at root of pyramid - zarray = root.require_dataset( + zarray = root.require_array( "0", shape=shape, exact=True, chunks=chunks, dtype=d_type, + chunk_key_encoding=fmt.chunk_key_encoding, + dimension_names=[axis["name"] for axis in axes], # omit for v0.4 ) print("row_count", row_count, "col_count", col_count) @@ -296,7 +320,6 @@ Writing big image from tiles:: zarray[ch_index, y1:y2, x1:x2] = tile paths = ["0", "1", "2"] - axes = [{"name": "c", "type": "channel"}, {"name": "y", "type": "space"}, {"name": "x", "type": "space"}] # We have "0" array. This downsamples (in X and Y dims only) to create "1" and "2" downsample_pyramid_on_disk(root, paths) @@ -313,7 +336,8 @@ Writing big image from tiles:: write_multiscales_metadata(root, datasets, axes=axes) -Using dask to fetch:: +Using dask to fetch. Here concatenate lazy "delayed" source of tiles into a full image. +When that dask data is passed to write_image() the tiles will be loaded on the fly:: # Created for https://forum.image.sc/t/writing-tile-wise-ome-zarr-with-pyramid-size/85063 @@ -323,9 +347,11 @@ Using dask to fetch:: from dask import delayed from ome_zarr.io import parse_url - from ome_zarr.writer import write_image, write_multiscales_metadata + from ome_zarr.format import FormatV04 + from ome_zarr.writer import write_image, add_metadata zarr_name = "test_dask.zarr" + # Use fmt=FormatV04() in parse_url() to write v0.4 format (zarr v2) store = parse_url(zarr_name, mode="w").store root = zarr.group(store=store) @@ -374,7 +400,7 @@ Using dask to fetch:: # This will create a downsampled 'multiscales' pyramid write_image(dask_data, root, axes="czyx") - root.attrs["omero"] = { + add_metadata(root, {"omero": { "channels": [ { "color": "FF0000", @@ -389,7 +415,7 @@ Using dask to fetch:: "active": True, }, ] - } + }}) print("Created image. Open with...") print(f"ome_zarr view {zarr_name}") diff --git a/ome_zarr/cli.py b/ome_zarr/cli.py index 3d88083f..c95ae9e6 100644 --- a/ome_zarr/cli.py +++ b/ome_zarr/cli.py @@ -6,6 +6,7 @@ from .csv import csv_to_zarr from .data import astronaut, coins, create_zarr +from .format import CurrentFormat, Format, format_from_version from .scale import Scaler from .utils import download as zarr_download from .utils import finder as bff_finder @@ -63,7 +64,11 @@ def create(args: argparse.Namespace) -> None: label_name = "circles" else: raise Exception(f"unknown method: {args.method}") - create_zarr(args.path, method=method, label_name=label_name) + fmt: Format = CurrentFormat() + if args.format: + fmt = format_from_version(args.format) + + create_zarr(args.path, method=method, label_name=label_name, fmt=fmt) def scale(args: argparse.Namespace) -> None: @@ -147,6 +152,9 @@ def main(args: list[str] | None = None) -> None: "--method", choices=("coins", "astronaut"), default="coins" ) parser_create.add_argument("path") + parser_create.add_argument( + "--format", help="OME-Zarr version to create. e.g. '0.4'" + ) parser_create.set_defaults(func=create) parser_scale = subparsers.add_parser("scale") diff --git a/ome_zarr/data.py b/ome_zarr/data.py index 33f766e2..0e696bc7 100644 --- a/ome_zarr/data.py +++ b/ome_zarr/data.py @@ -15,7 +15,7 @@ from .format import CurrentFormat, Format from .io import parse_url from .scale import Scaler -from .writer import write_multiscale +from .writer import add_metadata, write_multiscale CHANNEL_DIMENSION = 1 @@ -127,7 +127,7 @@ def create_zarr( """Generate a synthetic image pyramid with labels.""" pyramid, labels = method() - loc = parse_url(zarr_directory, mode="w") + loc = parse_url(zarr_directory, mode="w", fmt=fmt) assert loc grp = zarr.group(loc.store) axes = None @@ -162,6 +162,7 @@ def create_zarr( { "window": {"start": 0, "end": 255, "min": 0, "max": 255}, "color": "FF0000", + "active": True, } ], "rdefs": {"model": "greyscale"}, @@ -196,17 +197,18 @@ def create_zarr( axes=axes, storage_options=storage_options, metadata={"omero": image_data}, + fmt=fmt, ) if labels: labels_grp = grp.create_group("labels") - labels_grp.attrs["labels"] = [label_name] + add_metadata(labels_grp, {"labels": [label_name]}) label_grp = labels_grp.create_group(label_name) if axes is not None: # remove channel axis for masks axes = axes.replace("c", "") - write_multiscale(labels, label_grp, axes=axes) + write_multiscale(labels, label_grp, axes=axes, fmt=fmt) colors = [] properties = [] @@ -214,11 +216,16 @@ def create_zarr( rgba = [randrange(0, 256) for i in range(4)] colors.append({"label-value": x, "rgba": rgba}) properties.append({"label-value": x, "class": f"class {x}"}) - label_grp.attrs["image-label"] = { - "version": fmt.version, - "colors": colors, - "properties": properties, - "source": {"image": "../../"}, - } + add_metadata( + label_grp, + { + "image-label": { + "version": fmt.version, + "colors": colors, + "properties": properties, + "source": {"image": "../../"}, + } + }, + ) return grp diff --git a/ome_zarr/format.py b/ome_zarr/format.py index e0fba096..40170ac1 100644 --- a/ome_zarr/format.py +++ b/ome_zarr/format.py @@ -3,9 +3,9 @@ import logging from abc import ABC, abstractmethod from collections.abc import Iterator -from typing import Any +from typing import Any, Dict -from zarr.storage import FSStore +from zarr.storage import FsspecStore, LocalStore LOGGER = logging.getLogger("ome_zarr.format") @@ -25,6 +25,7 @@ def format_implementations() -> Iterator["Format"]: """ Return an instance of each format implementation, newest to oldest. """ + yield FormatV05() yield FormatV04() yield FormatV03() yield FormatV02() @@ -55,12 +56,22 @@ class Format(ABC): def version(self) -> str: # pragma: no cover raise NotImplementedError() + @property + @abstractmethod + def zarr_format(self) -> int: # pragma: no cover + raise NotImplementedError() + + @property + @abstractmethod + def chunk_key_encoding(self) -> Dict[str, str]: # pragma: no cover + raise NotImplementedError() + @abstractmethod def matches(self, metadata: dict) -> bool: # pragma: no cover raise NotImplementedError() @abstractmethod - def init_store(self, path: str, mode: str = "r") -> FSStore: + def init_store(self, path: str, mode: str = "r") -> FsspecStore | LocalStore: raise NotImplementedError() # @abstractmethod @@ -129,14 +140,35 @@ class FormatV01(Format): def version(self) -> str: return "0.1" + @property + def zarr_format(self) -> int: + return 2 + + @property + def chunk_key_encoding(self) -> Dict[str, str]: + return {"name": "v2", "separator": "."} + def matches(self, metadata: dict) -> bool: version = self._get_metadata_version(metadata) LOGGER.debug("%s matches %s?", self.version, version) return version == self.version - def init_store(self, path: str, mode: str = "r") -> FSStore: - store = FSStore(path, mode=mode, dimension_separator=".") - LOGGER.debug("Created legacy flat FSStore(%s, %s)", path, mode) + def init_store(self, path: str, mode: str = "r") -> FsspecStore | LocalStore: + """ + Not ideal. Stores should remain hidden + "dimension_separator" is specified at array creation time + """ + + if path.startswith(("http", "s3")): + store = FsspecStore.from_url( + path, + storage_options=None, + read_only=(mode in ("r", "r+", "a")), + ) + else: + # No other kwargs supported + store = LocalStore(path, read_only=(mode in ("r", "r+", "a"))) + LOGGER.debug("Created nested FsspecStore(%s, %s)", path, mode) return store def generate_well_dict( @@ -180,31 +212,9 @@ class FormatV02(FormatV01): def version(self) -> str: return "0.2" - def init_store(self, path: str, mode: str = "r") -> FSStore: - """ - Not ideal. Stores should remain hidden - TODO: could also check dimension_separator - """ - - kwargs = { - "dimension_separator": "/", - "normalize_keys": False, - } - - mkdir = True - if "r" in mode or path.startswith(("http", "s3")): - # Could be simplified on the fsspec side - mkdir = False - if mkdir: - kwargs["auto_mkdir"] = True - - store = FSStore( - path, - mode=mode, - **kwargs, - ) # TODO: open issue for using Path - LOGGER.debug("Created nested FSStore(%s, %s, %s)", path, mode, kwargs) - return store + @property + def chunk_key_encoding(self) -> Dict[str, str]: + return {"name": "v2", "separator": "/"} class FormatV03(FormatV02): # inherits from V02 to avoid code duplication @@ -343,4 +353,23 @@ def validate_coordinate_transformations( ) -CurrentFormat = FormatV04 +class FormatV05(FormatV04): + """ + Changelog: added FormatV05 (May 2025): writing not supported yet + """ + + @property + def version(self) -> str: + return "0.5" + + @property + def zarr_format(self) -> int: + return 3 + + @property + def chunk_key_encoding(self) -> Dict[str, str]: + # this is default for Zarr v3. Could return None? + return {"name": "default", "separator": "/"} + + +CurrentFormat = FormatV05 diff --git a/ome_zarr/io.py b/ome_zarr/io.py index 3a4d2fd2..9af1b7e2 100644 --- a/ome_zarr/io.py +++ b/ome_zarr/io.py @@ -3,13 +3,13 @@ Primary entry point is the :func:`~ome_zarr.io.parse_url` method. """ -import json import logging from pathlib import Path from urllib.parse import urljoin import dask.array as da -from zarr.storage import FSStore +import zarr +from zarr.storage import FsspecStore, LocalStore, StoreLike from .format import CurrentFormat, Format, detect_format from .types import JSONDict @@ -19,7 +19,7 @@ class ZarrLocation: """ - IO primitive for reading and writing Zarr data. Uses FSStore for all + IO primitive for reading and writing Zarr data. Uses a store for all data access. No assumptions about the existence of the given path string are made. @@ -28,7 +28,7 @@ class ZarrLocation: def __init__( self, - path: Path | str | FSStore, + path: StoreLike, mode: str = "r", fmt: Format = CurrentFormat(), ) -> None: @@ -39,18 +39,21 @@ def __init__( self.__path = str(path.resolve()) elif isinstance(path, str): self.__path = path - elif isinstance(path, FSStore): + elif isinstance(path, FsspecStore): self.__path = path.path + elif isinstance(path, LocalStore): + self.__path = str(path.root) else: raise TypeError(f"not expecting: {type(path)}") loader = fmt if loader is None: loader = CurrentFormat() - self.__store: FSStore = ( - path if isinstance(path, FSStore) else loader.init_store(self.__path, mode) + self.__store: FsspecStore = ( + path + if isinstance(path, (FsspecStore, LocalStore)) + else loader.init_store(self.__path, mode) ) - self.__init_metadata() detected = detect_format(self.__metadata, loader) LOGGER.debug("ZarrLocation.__init__ %s detected: %s", path, detected) @@ -66,16 +69,35 @@ def __init_metadata(self) -> None: """ Load the Zarr metadata files for the given location. """ - self.zarray: JSONDict = self.get_json(".zarray") - self.zgroup: JSONDict = self.get_json(".zgroup") + self.zgroup: JSONDict = {} + self.zarray: JSONDict = {} self.__metadata: JSONDict = {} self.__exists: bool = True - if self.zgroup: - self.__metadata = self.get_json(".zattrs") - elif self.zarray: - self.__metadata = self.get_json(".zattrs") - else: - self.__exists = False + # If we want to *create* a new zarr v2 group, we need to specify + # zarr_format. This is not needed for reading. + zarr_format = None + try: + # this group is used to get zgroup metadata + # used for info, download, Spec.match() via root_attrs() etc. + # and to check if the group exists for reading. Only need "r" mode for this. + group = zarr.open_group( + store=self.__store, path="/", mode="r", zarr_format=zarr_format + ) + self.zgroup = group.attrs.asdict() + # For zarr v3, everything is under the "ome" namespace + if "ome" in self.zgroup: + self.zgroup = self.zgroup["ome"] + self.__metadata = self.zgroup + except (ValueError, FileNotFoundError): + # group doesn't exist. If we are in "w" mode, we need to create it. + if self.__mode == "w": + # If we are creating a new group, we need to specify the zarr_format. + zarr_format = self.__fmt.zarr_format + group = zarr.open_group( + store=self.__store, path="/", mode="w", zarr_format=zarr_format + ) + else: + self.__exists = False def __repr__(self) -> str: """Print the path as well as whether this is a group or an array.""" @@ -98,12 +120,17 @@ def fmt(self) -> Format: def mode(self) -> str: return self.__mode + @property + def version(self) -> str: + """Return the version of the OME-NGFF spec used for this location.""" + return self.__fmt.version + @property def path(self) -> str: return self.__path @property - def store(self) -> FSStore: + def store(self) -> FsspecStore: """Return the initialized store for this location""" assert self.__store is not None return self.__store @@ -144,26 +171,6 @@ def create(self, path: str) -> "ZarrLocation": LOGGER.debug("open(%s(%s))", self.__class__.__name__, subpath) return self.__class__(subpath, mode=self.__mode, fmt=self.__fmt) - def get_json(self, subpath: str) -> JSONDict: - """ - Load and return a given subpath of store as JSON. - - HTTP 403 and 404 responses are treated as if the file does not exist. - Exceptions during the remote connection are logged at the WARN level. - All other exceptions log at the ERROR level. - """ - try: - data = self.__store.get(subpath) - if not data: - return {} - return json.loads(data) - except KeyError: - LOGGER.debug("JSON not found: %s", subpath) - return {} - except Exception: - LOGGER.exception("Error while loading JSON") - return {} - def parts(self) -> list[str]: if self._isfile(): return list(Path(self.__path).parts) @@ -192,10 +199,7 @@ def _isfile(self) -> bool: Return whether the current underlying implementation points to a local file or not. """ - return self.__store.fs.protocol == "file" or self.__store.fs.protocol == ( - "file", - "local", - ) + return isinstance(self.__store, LocalStore) def _ishttp(self) -> bool: """ @@ -220,13 +224,8 @@ def parse_url( >>> parse_url('does-not-exist') """ - try: - loc = ZarrLocation(path, mode=mode, fmt=fmt) - if "r" in mode and not loc.exists(): - return None - else: - return loc - except Exception: - LOGGER.exception("exception on parsing (stacktrace at DEBUG)") - LOGGER.debug("stacktrace:", exc_info=True) + loc = ZarrLocation(path, mode=mode, fmt=fmt) + if "r" in mode and not loc.exists(): return None + else: + return loc diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 628b0549..4430eb08 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -53,9 +53,7 @@ def __init__( self.specs.append(Multiscales(self)) if OMERO.matches(zarr): self.specs.append(OMERO(self)) - if plate_labels: - self.specs.append(PlateLabels(self)) - elif Plate.matches(zarr): + if Plate.matches(zarr): self.specs.append(Plate(self)) # self.add(zarr, plate_labels=True) if Well.matches(zarr): @@ -299,7 +297,7 @@ def __init__(self, node: Node) -> None: LOGGER.info("datasets %s", datasets) for resolution in self.datasets: - data: da.core.Array = self.array(resolution, version) + data: da.core.Array = self.array(resolution) chunk_sizes = [ str(c[0]) + (f" (+ {c[-1]})" if c[-1] != c[0] else "") for c in data.chunks @@ -320,7 +318,7 @@ def __init__(self, node: Node) -> None: if child_zarr.exists(): node.add(child_zarr, visibility=False) - def array(self, resolution: str, version: str) -> da.core.Array: + def array(self, resolution: str) -> da.core.Array: # data.shape is (t, c, z, y, x) by convention return self.zarr.load(resolution) @@ -562,51 +560,6 @@ def get_tile(row: int, col: int) -> da.core.Array: return da.concatenate(lazy_rows, axis=len(self.axes) - 2) -class PlateLabels(Plate): - def get_tile_path(self, level: int, row: int, col: int) -> str: # pragma: no cover - """251.zarr/A/1/0/labels/0/3/""" - path = ( - f"{self.row_names[row]}/{self.col_names[col]}/" - f"{self.first_field_path}/labels/0/{level}" - ) - return path - - def get_pyramid_lazy(self, node: Node) -> None: # pragma: no cover - super().get_pyramid_lazy(node) - # pyramid data may be multi-channel, but we only have 1 labels channel - # TODO: when PlateLabels are re-enabled, update the logic to handle - # 0.4 axes (list of dictionaries) - if "c" in self.axes: - c_index = self.axes.index("c") - idx = [slice(None)] * len(self.axes) - idx[c_index] = slice(0, 1) - node.data[0] = node.data[0][tuple(idx)] - # remove image metadata - node.metadata = {} - - # combine 'properties' from each image - # from https://github.com/ome/ome-zarr-py/pull/61/ - properties: dict[int, dict[str, Any]] = {} - for row in self.row_names: - for col in self.col_names: - path = f"{row}/{col}/{self.first_field_path}/labels/0/.zattrs" - labels_json = self.zarr.get_json(path).get("image-label", {}) - # NB: assume that 'label_val' is unique across all images - props_list = labels_json.get("properties", []) - if props_list: - for props in props_list: - label_val = props["label-value"] - properties[label_val] = dict(props) - del properties[label_val]["label-value"] - node.metadata["properties"] = properties - - def get_numpy_type(self, image_node: Node) -> np.dtype: # pragma: no cover - # FIXME - don't assume Well A1 is valid - path = self.get_tile_path(0, 0, 0) - label_zarr = self.zarr.load(path) - return label_zarr.dtype - - class Reader: """Parses the given Zarr instance into a collection of Nodes properly ordered depending on context. diff --git a/ome_zarr/scale.py b/ome_zarr/scale.py index 0f39750b..0fb3e85e 100644 --- a/ome_zarr/scale.py +++ b/ome_zarr/scale.py @@ -138,7 +138,7 @@ def __assert_values(self, pyramid: list[np.ndarray]) -> None: def __create_group( self, store: MutableMapping, base: np.ndarray, pyramid: list[np.ndarray] - ) -> zarr.hierarchy.Group: + ) -> zarr.Group: """Create group and datasets.""" grp = zarr.group(store) grp.create_dataset("base", data=base) diff --git a/ome_zarr/utils.py b/ome_zarr/utils.py index 395e713d..3d58459d 100644 --- a/ome_zarr/utils.py +++ b/ome_zarr/utils.py @@ -21,9 +21,10 @@ import zarr from dask.diagnostics import ProgressBar +from .format import format_from_version from .io import parse_url from .reader import Multiscales, Node, Reader -from .types import JSONDict +from .types import Any, JSONDict LOGGER = logging.getLogger("ome_zarr.utils") @@ -54,6 +55,11 @@ def info(path: str, stats: bool = False) -> Iterator[Node]: continue print(node) + loc = node.zarr + version = loc.zgroup.get("version") + if version is None: + version = loc.zgroup.get("multiscales", [{}])[0].get("version", "") + print(" - version:", version) print(" - metadata") for spec in node.specs: print(f" - {spec.__class__.__name__}") @@ -72,7 +78,9 @@ def view(input_path: str, port: int = 8000, dry_run: bool = False) -> None: # dry_run is for testing, so we don't open the browser or start the server zarrs = [] - if (Path(input_path) / ".zattrs").exists(): + if (Path(input_path) / ".zattrs").exists() or ( + Path(input_path) / "zarr.json" + ).exists(): zarrs = find_multiscales(Path(input_path)) if len(zarrs) == 0: print( @@ -120,9 +128,18 @@ def find_multiscales(path_to_zattrs): # We want full path to find the multiscales Image. e.g. full/path/to/image.zarr/0 # AND we want image Name, e.g. "image.zarr Series 0" # AND we want the dir path to use for Tags e.g. full/path/to - with open(path_to_zattrs / ".zattrs") as f: - text = f.read() + text = None + for name in (".zattrs", "zarr.json"): + if (Path(path_to_zattrs) / name).exists(): + with open(path_to_zattrs / name) as f: + text = f.read() + break + if text is None: + print("No .zattrs or zarr.json found in {path_to_zattrs}") + return [] zattrs = json.loads(text) + if "attributes" in zattrs and "ome" in zattrs["attributes"]: + zattrs = zattrs["attributes"]["ome"] if "plate" in zattrs: plate = zattrs.get("plate") wells = plate.get("wells") @@ -208,11 +225,11 @@ def finder(input_path: str, port: int = 8000, dry_run=False) -> None: # walk the input path to find all .zattrs files... def walk(path: Path): - if (path / ".zattrs").exists(): + if (path / ".zattrs").exists() or (path / "zarr.json").exists(): yield from find_multiscales(path) else: for p in path.iterdir(): - if (p / ".zattrs").exists(): + if (p / ".zattrs").exists() or (p / "zarr.json").exists(): yield from find_multiscales(p) elif p.is_dir(): yield from walk(p) @@ -322,26 +339,55 @@ def download(input_path: str, output_dir: str = ".") -> None: target_path = output_path / Path(*path) target_path.mkdir(parents=True) - with (target_path / ".zgroup").open("w") as f: + # Use version etc... + version = node.zarr.version + fmt = format_from_version(version) + + # store = parse_url(input_path, mode="w", fmt=fmt) + group_file = "zarr.json" + attrs_file = "zarr.json" + if fmt.zarr_format == 2: + group_file = ".zgroup" + attrs_file = ".zattrs" + + with (target_path / group_file).open("w") as f: f.write(json.dumps(node.zarr.zgroup)) - with (target_path / ".zattrs").open("w") as f: + with (target_path / attrs_file).open("w") as f: metadata: JSONDict = {} node.write_metadata(metadata) + if fmt.zarr_format == 3: + # For zarr v3, we need to put metadata under "ome" namespace + metadata = { + "attributes": {"ome": metadata}, + "zarr_format": 3, + "node_type": "group", + } f.write(json.dumps(metadata)) resolutions: list[da.core.Array] = [] datasets: list[str] = [] + for spec in node.specs: if isinstance(spec, Multiscales): datasets = spec.datasets resolutions = node.data + options: dict[str, Any] = {} + if fmt.zarr_format == 2: + options["dimension_separator"] = "/" + else: + options["chunk_key_encoding"] = fmt.chunk_key_encoding + options["dimension_names"] = [ + axis["name"] for axis in node.metadata["axes"] + ] if datasets and resolutions: pbar = ProgressBar() for dataset, data in reversed(list(zip(datasets, resolutions))): LOGGER.info("resolution %s...", dataset) with pbar: data.to_zarr( - str(target_path / dataset), dimension_separator="/" + str(target_path / dataset), + zarr_format=fmt.zarr_format, + **options, ) else: # Assume a group that needs metadata, like labels diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index e6e539bc..df836ac4 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -10,9 +10,10 @@ import numpy as np import zarr from dask.graph_manipulation import bind +from numcodecs import Blosc from .axes import Axes -from .format import CurrentFormat, Format +from .format import CurrentFormat, Format, FormatV04 from .scale import Scaler from .types import JSONDict @@ -171,11 +172,37 @@ def _validate_plate_wells( return validated_wells +def _blosc_compressor() -> Blosc: + """Return a Blosc compressor with zstd compression""" + return Blosc(cname="zstd", clevel=5, shuffle=Blosc.SHUFFLE) + + +def check_format( + group: zarr.Group, + fmt: Format | None = None, +) -> Format: + """Check if the format is valid for the given group""" + + zarr_format = group.info._zarr_format + if fmt is not None: + if fmt.zarr_format != zarr_format: + raise ValueError( + f"Group is zarr_format: {zarr_format} but OME-Zarr {fmt.version} is {fmt.zarr_format}" + ) + else: + if zarr_format == 2: + fmt = FormatV04() + elif zarr_format == 3: + fmt = CurrentFormat() + assert fmt is not None + return fmt + + def write_multiscale( pyramid: ListOfArrayLike, group: zarr.Group, chunks: tuple[Any, ...] | int | None = None, - fmt: Format = CurrentFormat(), + fmt: Format | None = None, axes: AxesType = None, coordinate_transformations: list[list[dict[str, Any]]] | None = None, storage_options: JSONDict | list[JSONDict] | None = None, @@ -190,7 +217,7 @@ def write_multiscale( :param pyramid: The image data to save. Largest level first. All image arrays MUST be up to 5-dimensional with dimensions ordered (t, c, z, y, x) - :type group: :class:`zarr.hierarchy.Group` + :type group: :class:`zarr.Group` :param group: The group within the zarr store to store the data in :type chunks: int or tuple of ints, optional :param chunks: @@ -226,6 +253,7 @@ def write_multiscale( :class:`dask.delayed.Delayed` representing the value to be computed by dask. """ + fmt = check_format(group, fmt) dims = len(pyramid[0].shape) axes = _get_valid_axes(dims, axes, fmt) dask_delayed = [] @@ -242,30 +270,62 @@ def write_multiscale( # (which might have been changed for versions 0.1 or 0.2) # if chunks are explicitly set in the storage options chunks_opt = options.pop("chunks", chunks) - # switch to this code in 0.5 - # chunks_opt = options.pop("chunks", None) if chunks_opt is not None: chunks_opt = _retuple(chunks_opt, data.shape) + options["chunk_key_encoding"] = fmt.chunk_key_encoding + zarr_format = fmt.zarr_format + compressor = options.pop("compressor", None) + if zarr_format == 2: + # by default we use Blosc with zstd compression + # Don't need this for zarr v3 as it has a default compressor + if compressor is None: + compressor = _blosc_compressor() + options["compressor"] = compressor + else: + if compressor is not None: + options["compressors"] = [compressor] + if axes is not None: + # the array zarr.json also contains axes names + # TODO: check if this is written by da.to_zarr + options["dimension_names"] = [ + axis["name"] for axis in axes if isinstance(axis, dict) + ] + if isinstance(data, da.Array): + if zarr_format == 2: + options["dimension_separator"] = "/" + del options["chunk_key_encoding"] + # handle any 'chunks' option from storage_options if chunks_opt is not None: data = da.array(data).rechunk(chunks=chunks_opt) - options["chunks"] = chunks_opt da_delayed = da.to_zarr( arr=data, url=group.store, component=str(Path(group.path, str(path))), - storage_options=options, - compressor=options.get("compressor", zarr.storage.default_compressor), - dimension_separator=group._store._dimension_separator, + # IF we pass storage_options then dask NEEDS url to be a string + storage_options=None, compute=compute, + zarr_format=zarr_format, + **options, ) if not compute: dask_delayed.append(da_delayed) else: - group.create_dataset(str(path), data=data, chunks=chunks_opt, **options) + if chunks_opt is not None: + options["chunks"] = chunks_opt + options["shape"] = data.shape + # otherwise we get 'null' + options["fill_value"] = 0 + + arr = group.create_array( + str(path), + dtype=data.dtype, + **options, + ) + arr[slice(None)] = data datasets.append({"path": str(path)}) @@ -297,7 +357,7 @@ def write_multiscale( def write_multiscales_metadata( group: zarr.Group, datasets: list[dict], - fmt: Format = CurrentFormat(), + fmt: Format | None = None, axes: AxesType = None, name: str | None = None, **metadata: str | JSONDict | list[JSONDict], @@ -305,7 +365,7 @@ def write_multiscales_metadata( """ Write the multiscales metadata in the group. - :type group: :class:`zarr.hierarchy.Group` + :type group: :class:`zarr.Group` :param group: The group within the zarr store to write the metadata in. :type datasets: list of dicts :param datasets: @@ -322,6 +382,7 @@ def write_multiscales_metadata( Ignored for versions 0.1 and 0.2. Required for version 0.3 or greater. """ + fmt = check_format(group, fmt) ndim = -1 if axes is not None: if fmt.version in ("0.1", "0.2"): @@ -337,7 +398,7 @@ def write_multiscales_metadata( and isinstance(metadata["metadata"], dict) and "omero" in metadata["metadata"] ): - omero_metadata = metadata["metadata"].get("omero") + omero_metadata = metadata["metadata"].pop("omero") if omero_metadata is None: raise KeyError("If `'omero'` is present, value cannot be `None`.") for c in omero_metadata["channels"]: @@ -353,23 +414,26 @@ def write_multiscales_metadata( if not isinstance(c["window"][p], (int, float)): raise TypeError(f"`'{p}'` must be an int or float.") - group.attrs["omero"] = omero_metadata + add_metadata(group, {"omero": omero_metadata}) # note: we construct the multiscale metadata via dict(), rather than {} # to avoid duplication of protected keys like 'version' in **metadata # (for {} this would silently over-write it, with dict() it explicitly fails) multiscales = [ - dict( - version=fmt.version, - datasets=_validate_datasets(datasets, ndim, fmt), - name=name or group.name, - **metadata, - ) + dict(datasets=_validate_datasets(datasets, ndim, fmt), name=name or group.name) ] + if len(metadata.get("metadata", {})) > 0: + multiscales[0]["metadata"] = metadata["metadata"] if axes is not None: multiscales[0]["axes"] = axes - group.attrs["multiscales"] = multiscales + if fmt.version in ("0.1", "0.2", "0.3", "0.4"): + multiscales[0]["version"] = fmt.version + else: + # Zarr v3 top-level version + add_metadata(group, {"version": fmt.version}) + + add_metadata(group, {"multiscales": multiscales}) def write_plate_metadata( @@ -377,7 +441,7 @@ def write_plate_metadata( rows: list[str], columns: list[str], wells: list[str | dict], - fmt: Format = CurrentFormat(), + fmt: Format | None = None, acquisitions: list[dict] | None = None, field_count: int | None = None, name: str | None = None, @@ -385,7 +449,7 @@ def write_plate_metadata( """ Write the plate metadata in the group. - :type group: :class:`zarr.hierarchy.Group` + :type group: :class:`zarr.Group` :param group: The group within the zarr store to write the metadata in. :type rows: list of str :param rows: The list of names for the plate rows. @@ -405,11 +469,11 @@ def write_plate_metadata( :param field_count: The maximum number of fields per view across wells. """ + fmt = check_format(group, fmt) plate: dict[str, str | int | list[dict]] = { "columns": _validate_plate_rows_columns(columns), "rows": _validate_plate_rows_columns(rows), "wells": _validate_plate_wells(wells, rows, columns, fmt=fmt), - "version": fmt.version, } if name is not None: plate["name"] = name @@ -417,18 +481,24 @@ def write_plate_metadata( plate["field_count"] = field_count if acquisitions is not None: plate["acquisitions"] = _validate_plate_acquisitions(acquisitions) - group.attrs["plate"] = plate + + if fmt.version in ("0.1", "0.2", "0.3", "0.4"): + plate["version"] = fmt.version + group.attrs["plate"] = plate + else: + # Zarr v3 metadata under 'ome' with top-level version + group.attrs["ome"] = {"version": fmt.version, "plate": plate} def write_well_metadata( group: zarr.Group, images: list[str | dict], - fmt: Format = CurrentFormat(), + fmt: Format | None = None, ) -> None: """ Write the well metadata in the group. - :type group: :class:`zarr.hierarchy.Group` + :type group: :class:`zarr.Group` :param group: The group within the zarr store to write the metadata in. :type images: list of dict :param images: The list of dictionaries for all fields of views. @@ -438,11 +508,17 @@ def write_well_metadata( Defaults to the most current. """ - well = { + fmt = check_format(group, fmt) + well: dict[str, Any] = { "images": _validate_well_images(images), - "version": fmt.version, } - group.attrs["well"] = well + + if fmt.version in ("0.1", "0.2", "0.3", "0.4"): + well["version"] = fmt.version + group.attrs["well"] = well + else: + # Zarr v3 metadata under 'ome' with top-level version + group.attrs["ome"] = {"version": fmt.version, "well": well} def write_image( @@ -450,7 +526,7 @@ def write_image( group: zarr.Group, scaler: Scaler = Scaler(), chunks: tuple[Any, ...] | int | None = None, - fmt: Format = CurrentFormat(), + fmt: Format | None = None, axes: AxesType = None, coordinate_transformations: list[list[dict[str, Any]]] | None = None, storage_options: JSONDict | list[JSONDict] | None = None, @@ -465,7 +541,7 @@ def write_image( if the scaler argument is non-None. Image array MUST be up to 5-dimensional with dimensions ordered (t, c, z, y, x). Image can be a numpy or dask Array. - :type group: :class:`zarr.hierarchy.Group` + :type group: :class:`zarr.Group` :param group: The group within the zarr store to write the metadata in. :type scaler: :class:`ome_zarr.scale.Scaler` :param scaler: @@ -504,8 +580,11 @@ def write_image( :class:`dask.delayed.Delayed` representing the value to be computed by dask. """ + fmt = check_format(group, fmt) dask_delayed_jobs = [] + name = metadata.pop("name", None) + name = str(name) if name is not None else None if isinstance(image, da.Array): dask_delayed_jobs = _write_dask_image( image, @@ -516,7 +595,7 @@ def write_image( axes=axes, coordinate_transformations=coordinate_transformations, storage_options=storage_options, - name=None, + name=name, compute=compute, **metadata, ) @@ -530,7 +609,7 @@ def write_image( axes=axes, coordinate_transformations=coordinate_transformations, storage_options=storage_options, - name=None, + name=name, compute=compute, **metadata, ) @@ -556,7 +635,7 @@ def _write_dask_image( group: zarr.Group, scaler: Scaler = Scaler(), chunks: tuple[Any, ...] | int | None = None, - fmt: Format = CurrentFormat(), + fmt: Format | None = None, axes: AxesType = None, coordinate_transformations: list[list[dict[str, Any]]] | None = None, storage_options: JSONDict | list[JSONDict] | None = None, @@ -564,6 +643,7 @@ def _write_dask_image( compute: bool | None = True, **metadata: str | JSONDict | list[JSONDict], ) -> list: + fmt = check_format(group, fmt) if fmt.version in ("0.1", "0.2"): # v0.1 and v0.2 are strictly 5D shape_5d: tuple[Any, ...] = (*(1,) * (5 - image.ndim), *image.shape) @@ -601,23 +681,44 @@ def _write_dask_image( # chunks_opt = options.pop("chunks", None) if chunks_opt is not None: chunks_opt = _retuple(chunks_opt, image.shape) + # image.chunks will be used by da.to_zarr image = da.array(image).rechunk(chunks=chunks_opt) - options["chunks"] = chunks_opt LOGGER.debug("chunks_opt: %s", chunks_opt) shapes.append(image.shape) LOGGER.debug( "write dask.array to_zarr shape: %s, dtype: %s", image.shape, image.dtype ) + kwargs: dict[str, Any] = {} + zarr_format = fmt.zarr_format + if zarr_format == 2: + kwargs["dimension_separator"] = "/" + kwargs["compressor"] = options.pop("compressor", _blosc_compressor()) + else: + kwargs["chunk_key_encoding"] = fmt.chunk_key_encoding + if axes is not None: + kwargs["dimension_names"] = [ + a["name"] for a in axes if isinstance(a, dict) + ] + if "compressor" in options: + # We use 'compressors' for group.create_array() but da.to_zarr() below uses + # zarr.create() which doesn't support 'compressors' + # TypeError: AsyncArray._create() got an unexpected keyword argument 'compressors' + # kwargs["compressors"] = [options.pop("compressor", _blosc_compressor())] + + # ValueError: compressor cannot be used for arrays with zarr_format 3. + # Use bytes-to-bytes codecs instead. + kwargs["compressor"] = options.pop("compressor") + delayed.append( da.to_zarr( arr=image, url=group.store, component=str(Path(group.path, str(path))), - storage_options=options, + # storage_options=options, compute=False, - compressor=options.get("compressor", zarr.storage.default_compressor), - dimension_separator=group._store._dimension_separator, + zarr_format=zarr_format, + **kwargs, ) ) datasets.append({"path": str(path)}) @@ -655,7 +756,7 @@ def write_label_metadata( name: str, colors: list[JSONDict] | None = None, properties: list[JSONDict] | None = None, - fmt: Format = CurrentFormat(), + fmt: Format | None = None, **metadata: list[JSONDict] | JSONDict | str, ) -> None: """ @@ -664,7 +765,7 @@ def write_label_metadata( The label data must have been written to a sub-group, with the same name as the second argument. - :type group: :class:`zarr.hierarchy.Group` + :type group: :class:`zarr.Group` :param group: The group within the zarr store to write the metadata in. :type name: str :param name: The name of the label sub-group. @@ -684,6 +785,7 @@ def write_label_metadata( The format of the ome_zarr data which should be used. Defaults to the most current. """ + fmt = check_format(group, fmt) label_group = group[name] image_label_metadata = {**metadata} if colors is not None: @@ -691,11 +793,47 @@ def write_label_metadata( if properties is not None: image_label_metadata["properties"] = properties image_label_metadata["version"] = fmt.version - label_group.attrs["image-label"] = image_label_metadata - label_list = group.attrs.get("labels", []) + label_list = get_metadata(group).get("labels", []) label_list.append(name) - group.attrs["labels"] = label_list + + add_metadata(group, {"labels": label_list}, fmt=fmt) + add_metadata(label_group, {"image-label": image_label_metadata}, fmt=fmt) + + +def get_metadata(group: zarr.Group, fmt: Format | None = None) -> dict: + fmt = check_format(group, fmt) + attrs = group.attrs + if fmt.version not in ("0.1", "0.2", "0.3", "0.4"): + attrs = attrs.get("ome", {}) + else: + attrs = dict(attrs) + return attrs + + +def add_metadata( + group: zarr.Group, metadata: JSONDict, fmt: Format | None = None +) -> None: + + fmt = check_format(group, fmt) + + attrs = group.attrs + if fmt.version not in ("0.1", "0.2", "0.3", "0.4"): + attrs = attrs.get("ome", {}) + + for key, value in metadata.items(): + # merge dicts... + if isinstance(value, dict) and isinstance(attrs.get(key), dict): + attrs[key].update(value) + else: + attrs[key] = value + + if fmt.version in ("0.1", "0.2", "0.3", "0.4"): + for key, value in attrs.items(): + group.attrs[key] = value + else: + # Zarr v3 metadata under 'ome' with top-level version + group.attrs["ome"] = attrs def write_multiscale_labels( @@ -703,7 +841,7 @@ def write_multiscale_labels( group: zarr.Group, name: str, chunks: tuple[Any, ...] | int | None = None, - fmt: Format = CurrentFormat(), + fmt: Format | None = None, axes: AxesType = None, coordinate_transformations: list[list[dict[str, Any]]] | None = None, storage_options: JSONDict | list[JSONDict] | None = None, @@ -722,7 +860,7 @@ def write_multiscale_labels( the image label data to save. Largest level first All image arrays MUST be up to 5-dimensional with dimensions ordered (t, c, z, y, x) - :type group: :class:`zarr.hierarchy.Group` + :type group: :class:`zarr.Group` :param group: The group within the zarr store to write the metadata in. :type name: str, optional :param name: The name of this labels data. @@ -762,6 +900,7 @@ def write_multiscale_labels( :class:`dask.delayed.Delayed` representing the value to be computed by dask. """ + fmt = check_format(group, fmt) sub_group = group.require_group(f"labels/{name}") dask_delayed_jobs = write_multiscale( pyramid, @@ -791,7 +930,7 @@ def write_labels( name: str, scaler: Scaler = Scaler(), chunks: tuple[Any, ...] | int | None = None, - fmt: Format = CurrentFormat(), + fmt: Format | None = None, axes: AxesType = None, coordinate_transformations: list[list[dict[str, Any]]] | None = None, storage_options: JSONDict | list[JSONDict] | None = None, @@ -811,7 +950,7 @@ def write_labels( if the scaler argument is non-None. Label array MUST be up to 5-dimensional with dimensions ordered (t, c, z, y, x) - :type group: :class:`zarr.hierarchy.Group` + :type group: :class:`zarr.Group` :param group: The group within the zarr store to write the metadata in. :type name: str, optional :param name: The name of this labels data. @@ -855,6 +994,7 @@ def write_labels( :class:`dask.delayed.Delayed` representing the value to be computed by dask. """ + fmt = check_format(group, fmt) sub_group = group.require_group(f"labels/{name}") dask_delayed_jobs = [] diff --git a/pyproject.toml b/pyproject.toml index 72d8f0bf..3a5e40a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ requires-python = ">3.10" dependencies = [ "numpy", "dask", - "zarr>=2.8.1,<3", + "zarr>=v3.0.0", "fsspec[s3]>=0.8,!=2021.07.0,!=2023.9.0", # See https://github.com/fsspec/filesystem_spec/issues/819 "aiohttp<4", diff --git a/tests/data/v2/0/.zarray b/tests/data/v2/0/.zarray index 705b3f46..c01d65ed 100644 --- a/tests/data/v2/0/.zarray +++ b/tests/data/v2/0/.zarray @@ -13,6 +13,7 @@ "id": "blosc", "shuffle": 1 }, + "dimension_separator": "/", "dtype": "|u1", "fill_value": 0, "filters": null, diff --git a/tests/test_cli.py b/tests/test_cli.py index ca7e692b..aacc4e6a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,7 +6,9 @@ import zarr from ome_zarr.cli import main -from ome_zarr.utils import finder, strip_common_prefix, view +from ome_zarr.format import CurrentFormat, FormatV04, FormatV05 +from ome_zarr.io import parse_url +from ome_zarr.utils import find_multiscales, finder, strip_common_prefix, view from ome_zarr.writer import write_plate_metadata @@ -31,44 +33,89 @@ def s3_address(self, request): "0.1": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/6001240.zarr", "0.2": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.2/6001240.zarr", "0.3": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.3/9836842.zarr", + "0.4": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr", + "0.5": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr", } return urls[request.param] - def test_coins_info(self): + @pytest.mark.parametrize( + "fmt", + ( + pytest.param(FormatV04(), id="V04"), + pytest.param(FormatV05(), id="V05"), + pytest.param(None, id="CurrentFormat"), + ), + ) + def test_coins_info(self, capsys, fmt): + """Test create and info with various formats.""" filename = str(self.path) + "-1" - main(["create", "--method=coins", filename]) + args = ["create", "--method=coins", filename] + if fmt: + args += ["--format", fmt.version] + main(args) main(["info", filename]) + out, err = capsys.readouterr() + print("Captured output:", out) + assert os.path.join("labels", "coins") in out + version = fmt.version if fmt else CurrentFormat().version + assert f"- version: {version}" in out def test_astronaut_info(self): filename = str(self.path) + "-2" main(["create", "--method=astronaut", filename]) main(["info", filename]) - def test_astronaut_download(self, tmpdir): + @pytest.mark.parametrize( + "fmt", + ( + pytest.param(FormatV04(), id="V04"), + pytest.param(FormatV05(), id="V05"), + pytest.param(None, id="CurrentFormat"), + ), + ) + def test_astronaut_download(self, tmpdir, fmt): out = str(tmpdir / "out") filename = str(self.path) + "-3" basename = os.path.split(filename)[-1] - main(["create", "--method=astronaut", filename]) + args = ["create", "--method=astronaut", filename] + if fmt: + args += ["--format", fmt.version] + main(args) main(["download", filename, f"--output={out}"]) main(["info", f"{out}/{basename}"]) - assert directory_items(Path(out) / "data-3") == [ - Path(".zattrs"), - Path(".zgroup"), - Path("0"), - Path("1"), - Path("2"), - Path("3"), - Path("4"), - Path("labels"), - ] - - assert directory_items(Path(out) / "data-3" / "1") == [ - Path(".zarray"), - Path("0"), - Path("1"), - Path("2"), - ] + if fmt is not None and fmt.zarr_format == 2: + assert directory_items(Path(out) / "data-3") == [ + Path(".zattrs"), + Path(".zgroup"), + Path("0"), + Path("1"), + Path("2"), + Path("3"), + Path("4"), + Path("labels"), + ] + assert directory_items(Path(out) / "data-3" / "1") == [ + Path(".zarray"), + Path(".zattrs"), # empty '{}' + Path("0"), + Path("1"), + Path("2"), + ] + else: + assert directory_items(Path(out) / "data-3") == [ + Path("0"), + Path("1"), + Path("2"), + Path("3"), + Path("4"), + Path("labels"), + Path("zarr.json"), + ] + assert directory_items(Path(out) / "data-3" / "1") == [ + Path("c"), + Path("zarr.json"), + ] def test_s3_info(self, s3_address): main(["info", s3_address]) @@ -113,12 +160,37 @@ def test_view(self): # we need dry_run to be True to avoid blocking the test with server view(filename, 8000, True) - def test_finder(self): + @pytest.mark.parametrize( + "fmt", + (pytest.param(FormatV04(), id="V04"), pytest.param(FormatV05(), id="V05")), + ) + def test_finder(self, fmt): img_dir = (self.path / "images").mkdir() + + # test with empty directory - for code coverage + finder(img_dir, 8000, True) + assert not (img_dir / "biofile_finder.csv").exists() + img_dir2 = (img_dir / "dir2").mkdir() bf2raw_dir = (img_dir / "bf2raw.zarr").mkdir() - main(["create", "--method=astronaut", (str(img_dir / "astronaut"))]) - main(["create", "--method=coins", (str(img_dir2 / "coins"))]) + main( + [ + "create", + "--method=astronaut", + (str(img_dir / "astronaut")), + "--format", + fmt.version, + ] + ) + main( + [ + "create", + "--method=coins", + (str(img_dir2 / "coins")), + "--format", + fmt.version, + ] + ) (bf2raw_dir / "OME").mkdir() # write minimal bioformats2raw and xml metadata @@ -132,8 +204,8 @@ def test_finder(self): ) # create a plate - plate_dir = (img_dir2 / "plate").mkdir() - store = zarr.DirectoryStore(str(plate_dir)) + plate_path = Path(img_dir2.mkdir("plate")) + store = parse_url(plate_path, mode="w", fmt=fmt).store root = zarr.group(store=store) write_plate_metadata(root, ["A"], ["1"], ["A/1"]) @@ -146,3 +218,8 @@ def test_finder(self): assert "dir2/plate/A/1/0,plate,dir2" in csv_text assert "coins,dir2" in csv_text assert "test.fake" in csv_text + + def test_find_multiscales(self): + # for code coverage... + empty_dir = (self.path / "find_multiscales").mkdir() + assert len(find_multiscales(empty_dir)) == 0 diff --git a/tests/test_io.py b/tests/test_io.py index 94b1900a..83a7a355 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,11 +1,12 @@ from pathlib import Path -import fsspec import pytest import zarr +from zarr.storage import LocalStore from ome_zarr.data import create_zarr from ome_zarr.io import ZarrLocation, parse_url +from ome_zarr.writer import add_metadata, get_metadata class TestIO: @@ -13,14 +14,14 @@ class TestIO: def initdir(self, tmpdir): self.path = tmpdir.mkdir("data") create_zarr(str(self.path)) - self.store = parse_url(str(self.path), mode="w").store - self.root = zarr.group(store=self.store) + self.store = parse_url(str(self.path), mode="r").store + self.root = zarr.open_group(store=self.store, mode="r") def test_parse_url(self): assert parse_url(str(self.path)) def test_parse_nonexistent_url(self): - assert parse_url(self.path + "/does-not-exist") is None + assert parse_url(str(self.path + "/does-not-exist")) is None def test_loc_str(self): assert ZarrLocation(str(self.path)) @@ -32,7 +33,22 @@ def test_loc_store(self): assert ZarrLocation(self.store) def test_loc_fs(self): - fs = fsspec.filesystem("memory") - fsstore = zarr.storage.FSStore(url="/", fs=fs) - loc = ZarrLocation(fsstore) + store = LocalStore(str(self.path)) + loc = ZarrLocation(store) assert loc + + def test_no_overwrite(self): + print("self.path:", self.path) + + assert self.root.attrs.get("ome") is not None + # Test that we can open a store to write, without + # overwriting existing data + new_store = parse_url(str(self.path), mode="w").store + new_root = zarr.open_group(store=new_store) + add_metadata(new_root, {"extra": "test_no_overwrite"}) + # read... + read_store = parse_url(str(self.path)).store + read_root = zarr.open_group(store=read_store, mode="r") + attrs = get_metadata(read_root) + assert attrs.get("extra") == "test_no_overwrite" + assert attrs.get("multiscales") is not None diff --git a/tests/test_node.py b/tests/test_node.py index a538c7c7..fc613b14 100644 --- a/tests/test_node.py +++ b/tests/test_node.py @@ -3,7 +3,7 @@ from numpy import zeros from ome_zarr.data import create_zarr -from ome_zarr.format import FormatV01, FormatV02, FormatV03 +from ome_zarr.format import FormatV01, FormatV02, FormatV03, FormatV04 from ome_zarr.io import parse_url from ome_zarr.reader import Label, Labels, Multiscales, Node, Plate, Well from ome_zarr.writer import write_image, write_plate_metadata, write_well_metadata @@ -44,16 +44,16 @@ class TestHCSNode: @pytest.fixture(autouse=True) def initdir(self, tmpdir): self.path = tmpdir.mkdir("data") - self.store = parse_url(str(self.path), mode="w").store + self.store = parse_url(str(self.path), mode="w", fmt=FormatV04()).store self.root = zarr.group(store=self.store) def test_minimal_plate(self): - write_plate_metadata(self.root, ["A"], ["1"], ["A/1"]) + write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], fmt=FormatV01()) row_group = self.root.require_group("A") well = row_group.require_group("1") - write_well_metadata(well, ["0"]) + write_well_metadata(well, ["0"], fmt=FormatV04()) image = well.require_group("0") - write_image(zeros((1, 1, 1, 256, 256)), image) + write_image(zeros((1, 1, 1, 256, 256)), image, fmt=FormatV01()) node = Node(parse_url(str(self.path)), list()) assert node.data @@ -85,7 +85,7 @@ def test_multiwells_plate(self, fmt): write_well_metadata(well, ["0", "1", "2"], fmt=fmt) for field in range(3): image = well.require_group(str(field)) - write_image(zeros((1, 1, 1, 256, 256)), image) + write_image(zeros((1, 1, 1, 256, 256)), image, fmt=fmt) node = Node(parse_url(str(self.path)), list()) assert node.data diff --git a/tests/test_reader.py b/tests/test_reader.py index 86188a0e..3aeda912 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -5,9 +5,16 @@ from numpy import ones, zeros from ome_zarr.data import create_zarr +from ome_zarr.format import FormatV04 from ome_zarr.io import parse_url from ome_zarr.reader import Node, Plate, Reader, Well -from ome_zarr.writer import write_image, write_plate_metadata, write_well_metadata +from ome_zarr.writer import ( + add_metadata, + get_metadata, + write_image, + write_plate_metadata, + write_well_metadata, +) class TestReader: @@ -42,6 +49,39 @@ def test_omero(self): assert isinstance(omero["channels"], list) assert len(omero["channels"]) == 1 + def test_read_v05(self): + rng = np.random.default_rng(0) + data = rng.poisson(lam=10, size=(10, 128, 128)).astype(np.uint8) + img_path = str(self.path / "test_read_v05.zarr") + root = zarr.group(img_path) + arr = root.create_array( + name="s0", shape=data.shape, chunks=(10, 10, 10), dtype=data.dtype + ) + arr[:, :] = data + root.attrs["ome"] = { + "version": "0.5", + "multiscales": [ + { + "datasets": [ + { + "path": "s0", + "coordinateTransformations": [ + { + "type": "scale", + "scale": [1, 1, 1], + } + ], + } + ] + } + ], + } + reader = Reader(parse_url(img_path)) + nodes = list(reader()) + assert len(nodes) == 1 + image_node = nodes[0] + assert np.allclose(data, image_node.data[0]) + class TestInvalid: @pytest.fixture(autouse=True) @@ -51,9 +91,9 @@ def initdir(self, tmpdir): def test_invalid_version(self): grp = create_zarr(str(self.path)) # update version to something invalid - attrs = grp.attrs.asdict() + attrs = get_metadata(grp) attrs["multiscales"][0]["version"] = "invalid" - grp.attrs.put(attrs) + add_metadata(grp, attrs) # should raise exception with pytest.raises(ValueError) as exe: reader = Reader(parse_url(str(self.path))) @@ -65,7 +105,7 @@ class TestHCSReader: @pytest.fixture(autouse=True) def initdir(self, tmpdir): self.path = tmpdir.mkdir("data") - self.store = parse_url(str(self.path), mode="w").store + self.store = parse_url(str(self.path), mode="w", fmt=FormatV04()).store self.root = zarr.group(store=self.store) def test_minimal_plate(self): diff --git a/tests/test_scaler.py b/tests/test_scaler.py index 93ddc726..c3ab1759 100644 --- a/tests/test_scaler.py +++ b/tests/test_scaler.py @@ -145,4 +145,4 @@ def test_big_dask_pyramid(self, tmpdir): print("level_1", level_1) # to zarr invokes compute data_dir = tmpdir.mkdir("test_big_dask_pyramid") - da.to_zarr(level_1, data_dir) + da.to_zarr(level_1, str(data_dir)) diff --git a/tests/test_writer.py b/tests/test_writer.py index a396ca6e..5461025f 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1,6 +1,6 @@ import filecmp +import json import pathlib -from tempfile import TemporaryDirectory from typing import Any import dask.array as da @@ -9,8 +9,17 @@ import zarr from dask import persist from numcodecs import Blosc - -from ome_zarr.format import CurrentFormat, FormatV01, FormatV02, FormatV03, FormatV04 +from zarr.abc.codec import BytesBytesCodec +from zarr.codecs import BloscCodec + +from ome_zarr.format import ( + CurrentFormat, + FormatV01, + FormatV02, + FormatV03, + FormatV04, + FormatV05, +) from ome_zarr.io import ZarrLocation, parse_url from ome_zarr.reader import Multiscales, Reader from ome_zarr.scale import Scaler @@ -39,10 +48,17 @@ class TestWriter: @pytest.fixture(autouse=True) def initdir(self, tmpdir): self.path = pathlib.Path(tmpdir.mkdir("data")) - self.store = parse_url(self.path, mode="w").store + # create zarr v2 group... + self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store self.root = zarr.group(store=self.store) self.group = self.root.create_group("test") + # let's create zarr v3 group too... + self.path_v3 = self.path / "v3" + store_v3 = parse_url(self.path_v3, mode="w").store + root_v3 = zarr.group(store=store_v3) + self.group_v3 = root_v3.create_group("test") + def create_data(self, shape, dtype=np.uint8, mean_val=10): rng = np.random.default_rng(0) return rng.poisson(mean_val, size=shape).astype(dtype) @@ -72,6 +88,7 @@ def scaler(self, request): pytest.param(FormatV02, id="V02"), pytest.param(FormatV03, id="V03"), pytest.param(FormatV04, id="V04"), + pytest.param(FormatV05, id="V05"), ), ) @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) @@ -79,9 +96,17 @@ def scaler(self, request): def test_writer( self, shape, scaler, format_version, array_constructor, storage_options_list ): + version = format_version() + + if version.version == "0.5": + group = self.group_v3 + grp_path = self.path_v3 / "test" + else: + group = self.group + grp_path = self.path / "test" + data = self.create_data(shape) data = array_constructor(data) - version = format_version() axes = "tczyx"[-len(shape) :] transformations = [] for dataset_transfs in TRANSFORMATIONS: @@ -98,7 +123,7 @@ def test_writer( storage_options = [{"chunks": chunk} for chunk in chunks] write_image( image=data, - group=self.group, + group=group, scaler=scaler, fmt=version, axes=axes, @@ -107,7 +132,7 @@ def test_writer( ) # Verify - reader = Reader(parse_url(f"{self.path}/test")) + reader = Reader(parse_url(f"{grp_path}")) node = next(iter(reader())) assert Multiscales.matches(node.zarr) if version.version in ("0.1", "0.2"): @@ -129,13 +154,47 @@ def test_writer( assert tuple(first_chunk) == _retuple(expected, nd_array.shape) assert np.allclose(data, node.data[0][...].compute()) + def test_mix_zarr_formats(self): + # check group zarr v2 and v3 matches fmt + data = self.create_data((64, 64, 64)) + with pytest.raises(ValueError, match=r"Group is zarr_format: 2"): + write_image(data, self.group, axes="zyx", fmt=CurrentFormat()) + + with pytest.raises(ValueError, match=r"Group is zarr_format: 3"): + write_multiscale([data], self.group_v3, fmt=FormatV04()) + + with pytest.raises(ValueError, match=r"Group is zarr_format: 3"): + write_plate_metadata(self.group_v3, ["A"], ["1"], ["A/1"], fmt=FormatV04()) + + with pytest.raises(ValueError, match=r"Group is zarr_format: 2"): + write_well_metadata(self.group, [{"path": "0"}], fmt=CurrentFormat()) + + @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) - def test_write_image_current(self, array_constructor): + def test_write_image_current(self, array_constructor, zarr_format): shape = (64, 64, 64) data = self.create_data(shape) data = array_constructor(data) - write_image(data, self.group, axes="zyx") - reader = Reader(parse_url(f"{self.path}/test")) + + if zarr_format == 2: + group = self.group + grp_path = self.path / "test" + else: + group = self.group_v3 + grp_path = self.path_v3 / "test" + + write_image(data, group, axes="zyx") + reader = Reader(parse_url(f"{grp_path}")) + + # manually check this is zarr v2 or v3 + if zarr_format == 2: + json_text = (grp_path / ".zattrs").read_text(encoding="utf-8") + attrs_json = json.loads(json_text) + else: + json_text = (grp_path / "zarr.json").read_text(encoding="utf-8") + attrs_json = json.loads(json_text).get("attributes", {}).get("ome", {}) + assert "multiscales" in attrs_json + image_node = next(iter(reader())) for transfs in image_node.metadata["coordinateTransformations"]: assert len(transfs) == 1 @@ -148,34 +207,61 @@ def test_write_image_current(self, array_constructor): @pytest.mark.parametrize("read_from_zarr", [True, False]) @pytest.mark.parametrize("compute", [True, False]) - def test_write_image_dask(self, read_from_zarr, compute): + @pytest.mark.parametrize("zarr_format", [2, 3]) + def test_write_image_dask(self, read_from_zarr, compute, zarr_format): + if zarr_format == 2: + grp_path = self.path / "test" + fmt = FormatV04() + zarr_attrs = ".zattrs" + zarr_array = ".zarray" + group = self.group + else: + grp_path = self.path_v3 / "test" + fmt = CurrentFormat() + zarr_attrs = "zarr.json" + zarr_array = "zarr.json" + group = self.group_v3 + # Size 100 tests resize shapes: https://github.com/ome/ome-zarr-py/issues/219 shape = (128, 200, 200) data = self.create_data(shape) data_delayed = da.from_array(data) chunks = (32, 32) - opts = {"chunks": chunks, "compressor": None} + # same NAME needed for exact zarr_attrs match below + # (otherwise group.name is used) + NAME = "test_write_image_dask" + opts = {"chunks": chunks} if read_from_zarr: # write to zarr and re-read as dask... - path = f"{self.path}/temp/" - store = parse_url(path, mode="w").store - temp_group = zarr.group(store=store).create_group("test") - write_image(data, temp_group, axes="zyx", storage_options=opts) - loc = ZarrLocation(f"{self.path}/temp/test") + path = f"{grp_path}/temp/" + store = parse_url(path, mode="w", fmt=fmt).store + # store and group will be zarr v2 or v3 depending on fmt + temp_group = zarr.group(store=store).create_group("to_dask") + assert temp_group.info._zarr_format == zarr_format + write_image( + data_delayed, + temp_group, + axes="zyx", + storage_options=opts, + name=NAME, + ) + print("PATH", f"{grp_path}/temp/to_dask") + loc = ZarrLocation(f"{grp_path}/temp/to_dask") + reader = Reader(loc)() nodes = list(reader) - data_delayed = ( - nodes[0] - .load(Multiscales) - .array(resolution="0", version=CurrentFormat().version) - ) + data_delayed = nodes[0].load(Multiscales).array(resolution="0") + # check that the data is the same + assert np.allclose(data, data_delayed[...].compute()) + assert group.info._zarr_format == zarr_format dask_delayed_jobs = write_image( data_delayed, - self.group, + group, axes="zyx", - storage_options={"chunks": chunks, "compressor": None}, + storage_options={"chunks": chunks}, compute=compute, + name=NAME, ) assert not compute == len(dask_delayed_jobs) @@ -185,7 +271,8 @@ def test_write_image_dask(self, read_from_zarr, compute): # before persisting the jobs dask_delayed_jobs = persist(*dask_delayed_jobs) - reader = Reader(parse_url(f"{self.path}/test")) + # check the data written to zarr v2 or v3 group + reader = Reader(parse_url(f"{grp_path}")) image_node = next(iter(reader())) first_chunk = [c[0] for c in image_node.data[0].chunks] assert tuple(first_chunk) == _retuple(chunks, image_node.data[0].shape) @@ -203,16 +290,16 @@ def test_write_image_dask(self, read_from_zarr, compute): # if shape smaller than chunk, dask writer uses chunk == shape # so we only compare larger resolutions assert filecmp.cmp( - f"{self.path}/temp/test/{level}/.zarray", - f"{self.path}/test/{level}/.zarray", + f"{grp_path}/temp/to_dask/{level}/{zarr_array}", + f"{grp_path}/{level}/{zarr_array}", shallow=False, ) if read_from_zarr: - # .zattrs should be the same + # exact match, including NAME assert filecmp.cmp( - f"{self.path}/temp/test/.zattrs", - f"{self.path}/test/.zattrs", + f"{grp_path}/temp/to_dask/{zarr_attrs}", + f"{grp_path}/{zarr_attrs}", shallow=False, ) @@ -226,29 +313,80 @@ def test_write_image_scalar_chunks(self): write_image( image=data, group=self.group, axes="xyz", storage_options={"chunks": 32} ) - for data in self.group.values(): + for data in self.group.array_values(): print(data) assert data.chunks == (32, 32, 32) + @pytest.mark.parametrize( + "format_version", + ( + pytest.param(FormatV04, id="V04"), + pytest.param(FormatV05, id="V05"), + ), + ) @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) - def test_write_image_compressed(self, array_constructor): + def test_write_image_compressed(self, array_constructor, format_version): shape = (64, 64, 64) data = self.create_data(shape) data = array_constructor(data) - compressor = Blosc(cname="zstd", clevel=5, shuffle=Blosc.SHUFFLE) + path = self.path / "test_write_image_compressed" + store = parse_url(path, mode="w", fmt=format_version()).store + root = zarr.group(store=store) + CNAME = "lz4" + LEVEL = 4 + if format_version().zarr_format == 3: + compressor = BloscCodec(cname=CNAME, clevel=LEVEL, shuffle="shuffle") + assert isinstance(compressor, BytesBytesCodec) + if isinstance(data, da.Array): + # skip test - can't get this to pass. Fails with: + # ValueError: compressor cannot be used for arrays with zarr_format 3. + # Use bytes-to-bytes codecs instead. + pytest.skip("storage_options['compressor'] fails in da.to_zarr()") + else: + compressor = Blosc(cname=CNAME, clevel=LEVEL, shuffle=Blosc.SHUFFLE) + write_image( - data, self.group, axes="zyx", storage_options={"compressor": compressor} + data, + root, + axes="zyx", + storage_options={"compressor": compressor}, ) - group = zarr.open(f"{self.path}/test") - assert group["0"].compressor.get_config() == { - "id": "blosc", - "cname": "zstd", - "clevel": 5, - "shuffle": Blosc.SHUFFLE, - "blocksize": 0, - } - - def test_default_compression(self): + group = zarr.open(f"{path}") + for ds in ["0", "1"]: + assert len(group[ds].info._compressors) > 0 + comp = group[ds].info._compressors[0] + if format_version().zarr_format == 3: + print("comp", comp.to_dict()) + # {'configuration': {'checksum': False, 'level': 0}, 'name': 'zstd'} + assert comp.to_dict() == { + "name": "blosc", + "configuration": { + "typesize": 1, + "cname": CNAME, + "clevel": LEVEL, + "shuffle": "shuffle", + "blocksize": 0, + }, + } + else: + print("comp", comp.get_config()) + assert comp.get_config() == { + "id": "blosc", + "cname": CNAME, + "clevel": LEVEL, + "shuffle": Blosc.SHUFFLE, + "blocksize": 0, + } + + @pytest.mark.parametrize( + "format_version", + ( + pytest.param(FormatV04, id="V04"), + pytest.param(FormatV05, id="V05"), + ), + ) + @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) + def test_default_compression(self, array_constructor, format_version): """Test that the default compression is not None. We make an array of zeros which should compress trivially easily, @@ -259,19 +397,56 @@ def test_default_compression(self): # avoid empty chunks so they are guaranteed to be written out to disk arr_np[0, 0, 0, 0] = 1 # 4MB chunks, trivially compressible - arr = da.from_array(arr_np, chunks=(1, 50, 200, 400)) - with TemporaryDirectory(suffix=".ome.zarr") as tempdir: - path = tempdir - store = parse_url(path, mode="w").store - root = zarr.group(store=store) - # no compressor options, we are checking default - write_multiscale([arr], group=root, axes="tzyx") - # check chunk: multiscale level 0, 4D chunk at (0, 0, 0, 0) - chunk_size = (pathlib.Path(path) / "0/0/0/0/0").stat().st_size - assert chunk_size < 4e6 - - def test_validate_coordinate_transforms(self): - fmt = FormatV04() + arr = array_constructor(arr_np) + # tempdir = TemporaryDirectory(suffix=".ome.zarr") + # self.path = pathlib.Path(tmpdir.mkdir("data")) + path = self.path / "test_default_compression" + store = parse_url(path, mode="w", fmt=format_version()).store + root = zarr.group(store=store) + assert root.info._zarr_format == format_version().zarr_format + # no compressor options, we are checking default + write_image( + arr, group=root, axes="tzyx", storage_options=dict(chunks=(1, 100, 100)) + ) + + # check chunk: multiscale level 0, 4D chunk at (0, 0, 0, 0) + c = "" + for ds in ["0", "1"]: + if format_version().zarr_format == 3: + assert (path / "zarr.json").exists() + assert (path / ds / "zarr.json").exists() + c = "c/" + json_text = (path / ds / "zarr.json").read_text(encoding="utf-8") + arr_json = json.loads(json_text) + assert arr_json["codecs"][0]["name"] == "bytes" + assert arr_json["codecs"][1] == { + "name": "zstd", + "configuration": {"level": 0, "checksum": False}, + } + else: + assert (path / ".zattrs").exists() + json_text = (path / ds / ".zarray").read_text(encoding="utf-8") + arr_json = json.loads(json_text) + assert arr_json["compressor"] == { + "blocksize": 0, + "clevel": 5, + "cname": "zstd", + "id": "blosc", + "shuffle": 1, + } + + chunk_size = (path / f"0/{c}0/0/0/0").stat().st_size + assert chunk_size < 4e6 + + @pytest.mark.parametrize( + "format_version", + ( + pytest.param(FormatV04, id="V04"), + pytest.param(FormatV05, id="V05"), + ), + ) + def test_validate_coordinate_transforms(self, format_version): + fmt = format_version() transformations = [ [{"type": "scale", "scale": (1, 1)}], @@ -429,17 +604,39 @@ class TestMultiscalesMetadata: @pytest.fixture(autouse=True) def initdir(self, tmpdir): self.path = pathlib.Path(tmpdir.mkdir("data")) - self.store = parse_url(self.path, mode="w").store + # create zarr v2 group... + self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store self.root = zarr.group(store=self.store) - def test_multi_levels_transformations(self): + # let's create zarr v3 group too... + self.path_v3 = self.path / "v3" + store_v3 = parse_url(self.path_v3, mode="w").store + self.root_v3 = zarr.group(store=store_v3) + + @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05())) + def test_multi_levels_transformations(self, fmt): datasets = [] for level, transf in enumerate(TRANSFORMATIONS): datasets.append({"path": str(level), "coordinateTransformations": transf}) - write_multiscales_metadata(self.root, datasets, axes="tczyx") - assert "multiscales" in self.root.attrs - assert "version" in self.root.attrs["multiscales"][0] - assert self.root.attrs["multiscales"][0]["datasets"] == datasets + if fmt.version == "0.5": + group = self.root_v3 + else: + group = self.root + write_multiscales_metadata(group, datasets, axes="tczyx") + # we want to be sure this is zarr v2 / v3 + attrs = group.attrs + if fmt.version == "0.5": + attrs = attrs.get("ome") + assert "version" in attrs + json_text = (self.path_v3 / "zarr.json").read_text(encoding="utf-8") + attrs_json = json.loads(json_text).get("attributes", {}).get("ome", {}) + else: + json_text = (self.path / ".zattrs").read_text(encoding="utf-8") + attrs_json = json.loads(json_text) + assert "version" in attrs["multiscales"][0] + assert "multiscales" in attrs_json + assert "multiscales" in attrs + assert attrs["multiscales"][0]["datasets"] == datasets @pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03())) def test_version(self, fmt): @@ -470,7 +667,7 @@ def test_axes_V03(self, axes): assert self.root.attrs["multiscales"][0]["axes"] == axes with pytest.raises(ValueError): # for v0.4 and above, paths no-longer supported (need dataset dicts) - write_multiscales_metadata(self.root, ["0"], axes=axes) + write_multiscales_metadata(self.root, ["0"], axes=axes, fmt=FormatV04()) @pytest.mark.parametrize("fmt", (FormatV01(), FormatV02())) def test_axes_ignored(self, fmt): @@ -498,7 +695,7 @@ def test_invalid_0_3_axes(self, axes): def test_invalid_datasets(self, datasets): with pytest.raises(ValueError): write_multiscales_metadata( - self.root, datasets, axes=["t", "c", "z", "y", "x"] + self.root, datasets, axes=["t", "c", "z", "y", "x"], fmt=FormatV04() ) @pytest.mark.parametrize( @@ -519,7 +716,7 @@ def test_valid_transformations(self, coordinateTransformations): "coordinateTransformations": coordinateTransformations, } ] - write_multiscales_metadata(self.root, datasets, axes=axes) + write_multiscales_metadata(self.root, datasets, axes=axes, fmt=FormatV04()) assert "multiscales" in self.root.attrs assert self.root.attrs["multiscales"][0]["axes"] == axes assert self.root.attrs["multiscales"][0]["datasets"] == datasets @@ -570,7 +767,7 @@ def test_invalid_transformations(self, coordinateTransformations): {"path": "0", "coordinateTransformations": coordinateTransformations} ] with pytest.raises(ValueError): - write_multiscales_metadata(self.root, datasets, axes=axes) + write_multiscales_metadata(self.root, datasets, axes=axes, fmt=FormatV04()) @pytest.mark.parametrize( "metadata", @@ -603,7 +800,10 @@ def test_omero_metadata(self, metadata: dict[str, Any] | None): KeyError, match="If `'omero'` is present, value cannot be `None`." ): write_multiscales_metadata( - self.root, datasets, axes="tczyx", metadata={"omero": metadata} + self.root, + datasets, + axes="tczyx", + metadata={"omero": metadata}, ) else: window_metadata = ( @@ -624,6 +824,7 @@ def test_omero_metadata(self, metadata: dict[str, Any] | None): datasets, axes="tczyx", metadata={"omero": metadata}, + fmt=FormatV04(), ) elif isinstance(window_metadata, list): with pytest.raises(TypeError, match=".*`'window'`.*"): @@ -632,6 +833,7 @@ def test_omero_metadata(self, metadata: dict[str, Any] | None): datasets, axes="tczyx", metadata={"omero": metadata}, + fmt=FormatV04(), ) elif color_metadata is not None and len(color_metadata) != 6: with pytest.raises(TypeError, match=".*`'color'`.*"): @@ -643,7 +845,10 @@ def test_omero_metadata(self, metadata: dict[str, Any] | None): ) else: write_multiscales_metadata( - self.root, datasets, axes="tczyx", metadata={"omero": metadata} + self.root, + datasets, + axes="tczyx", + metadata={"omero": metadata}, ) @@ -651,23 +856,40 @@ class TestPlateMetadata: @pytest.fixture(autouse=True) def initdir(self, tmpdir): self.path = pathlib.Path(tmpdir.mkdir("data")) - self.store = parse_url(self.path, mode="w").store + # create zarr v2 group... + self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store self.root = zarr.group(store=self.store) + # create zarr v3 group... + self.path_v3 = self.path / "v3" + store_v3 = parse_url(self.path_v3, mode="w").store + self.root_v3 = zarr.group(store=store_v3) + + @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05())) + def test_minimal_plate(self, fmt): + if fmt.version == "0.4": + group = self.root + else: + group = self.root_v3 + write_plate_metadata(group, ["A"], ["1"], ["A/1"]) + attrs = group.attrs + if fmt.version != "0.4": + attrs = attrs["ome"] + assert attrs["version"] == fmt.version + else: + attrs["plate"]["version"] == fmt.version - def test_minimal_plate(self): - write_plate_metadata(self.root, ["A"], ["1"], ["A/1"]) - assert "plate" in self.root.attrs - assert self.root.attrs["plate"]["columns"] == [{"name": "1"}] - assert self.root.attrs["plate"]["rows"] == [{"name": "A"}] - assert self.root.attrs["plate"]["version"] == CurrentFormat().version - assert self.root.attrs["plate"]["wells"] == [ + assert "plate" in attrs + assert attrs["plate"]["columns"] == [{"name": "1"}] + assert attrs["plate"]["rows"] == [{"name": "A"}] + assert attrs["plate"]["wells"] == [ {"path": "A/1", "rowIndex": 0, "columnIndex": 0} ] - assert "name" not in self.root.attrs["plate"] - assert "field_count" not in self.root.attrs["plate"] - assert "acquisitions" not in self.root.attrs["plate"] + assert "name" not in attrs["plate"] + assert "field_count" not in attrs["plate"] + assert "acquisitions" not in attrs["plate"] - def test_12wells_plate(self): + @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05())) + def test_12wells_plate(self, fmt): rows = ["A", "B", "C", "D"] cols = ["1", "2", "3"] wells = [ @@ -684,21 +906,28 @@ def test_12wells_plate(self): "D/2", "D/3", ] - write_plate_metadata(self.root, rows, cols, wells) - assert "plate" in self.root.attrs - assert self.root.attrs["plate"]["columns"] == [ + if fmt.version == "0.4": + group = self.root + else: + group = self.root_v3 + write_plate_metadata(group, rows, cols, wells) + attrs = group.attrs + if fmt.version != "0.4": + attrs = attrs["ome"] + + assert "plate" in attrs + assert attrs["plate"]["columns"] == [ {"name": "1"}, {"name": "2"}, {"name": "3"}, ] - assert self.root.attrs["plate"]["rows"] == [ + assert attrs["plate"]["rows"] == [ {"name": "A"}, {"name": "B"}, {"name": "C"}, {"name": "D"}, ] - assert self.root.attrs["plate"]["version"] == CurrentFormat().version - assert self.root.attrs["plate"]["wells"] == [ + assert attrs["plate"]["wells"] == [ {"path": "A/1", "rowIndex": 0, "columnIndex": 0}, {"path": "A/2", "rowIndex": 0, "columnIndex": 1}, {"path": "A/3", "rowIndex": 0, "columnIndex": 2}, @@ -712,41 +941,48 @@ def test_12wells_plate(self): {"path": "D/2", "rowIndex": 3, "columnIndex": 1}, {"path": "D/3", "rowIndex": 3, "columnIndex": 2}, ] - assert "name" not in self.root.attrs["plate"] - assert "field_count" not in self.root.attrs["plate"] - assert "acquisitions" not in self.root.attrs["plate"] + assert "name" not in attrs["plate"] + assert "field_count" not in attrs["plate"] + assert "acquisitions" not in attrs["plate"] - def test_sparse_plate(self): + @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05())) + def test_sparse_plate(self, fmt): rows = ["A", "B", "C", "D", "E"] cols = ["1", "2", "3", "4", "5"] wells = [ "B/2", "E/5", ] - write_plate_metadata(self.root, rows, cols, wells) - assert "plate" in self.root.attrs - assert self.root.attrs["plate"]["columns"] == [ + if fmt.version == "0.4": + group = self.root + else: + group = self.root_v3 + write_plate_metadata(group, rows, cols, wells) + attrs = group.attrs + if fmt.version != "0.4": + attrs = attrs["ome"] + assert "plate" in attrs + assert attrs["plate"]["columns"] == [ {"name": "1"}, {"name": "2"}, {"name": "3"}, {"name": "4"}, {"name": "5"}, ] - assert self.root.attrs["plate"]["rows"] == [ + assert attrs["plate"]["rows"] == [ {"name": "A"}, {"name": "B"}, {"name": "C"}, {"name": "D"}, {"name": "E"}, ] - assert self.root.attrs["plate"]["version"] == CurrentFormat().version - assert self.root.attrs["plate"]["wells"] == [ + assert attrs["plate"]["wells"] == [ {"path": "B/2", "rowIndex": 1, "columnIndex": 1}, {"path": "E/5", "rowIndex": 4, "columnIndex": 4}, ] - assert "name" not in self.root.attrs["plate"] - assert "field_count" not in self.root.attrs["plate"] - assert "acquisitions" not in self.root.attrs["plate"] + assert "name" not in attrs["plate"] + assert "field_count" not in attrs["plate"] + assert "acquisitions" not in attrs["plate"] @pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03())) def test_legacy_wells(self, fmt): @@ -761,25 +997,30 @@ def test_legacy_wells(self, fmt): assert "acquisitions" not in self.root.attrs["plate"] def test_plate_name(self): - write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], name="test") - assert "plate" in self.root.attrs - assert self.root.attrs["plate"]["columns"] == [{"name": "1"}] - assert self.root.attrs["plate"]["name"] == "test" - assert self.root.attrs["plate"]["rows"] == [{"name": "A"}] - assert self.root.attrs["plate"]["version"] == CurrentFormat().version - assert self.root.attrs["plate"]["wells"] == [ + # We don't need to test v04 and v05 for all tests since + # the metadata is the same + write_plate_metadata(self.root_v3, ["A"], ["1"], ["A/1"], name="test") + attrs = self.root_v3.attrs["ome"] + assert "plate" in attrs + assert attrs["plate"]["columns"] == [{"name": "1"}] + assert attrs["plate"]["name"] == "test" + assert attrs["plate"]["rows"] == [{"name": "A"}] + assert attrs["version"] == FormatV05().version + assert attrs["plate"]["wells"] == [ {"path": "A/1", "rowIndex": 0, "columnIndex": 0} ] - assert "field_count" not in self.root.attrs["plate"] - assert "acquisitions" not in self.root.attrs["plate"] + assert "field_count" not in attrs["plate"] + assert "acquisitions" not in attrs["plate"] def test_field_count(self): - write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], field_count=10) + write_plate_metadata( + self.root, ["A"], ["1"], ["A/1"], field_count=10, fmt=FormatV04() + ) assert "plate" in self.root.attrs assert self.root.attrs["plate"]["columns"] == [{"name": "1"}] assert self.root.attrs["plate"]["field_count"] == 10 assert self.root.attrs["plate"]["rows"] == [{"name": "A"}] - assert self.root.attrs["plate"]["version"] == CurrentFormat().version + assert self.root.attrs["plate"]["version"] == FormatV04().version assert self.root.attrs["plate"]["wells"] == [ {"path": "A/1", "rowIndex": 0, "columnIndex": 0} ] @@ -788,12 +1029,14 @@ def test_field_count(self): def test_acquisitions_minimal(self): a = [{"id": 1}, {"id": 2}, {"id": 3}] - write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], acquisitions=a) + write_plate_metadata( + self.root, ["A"], ["1"], ["A/1"], acquisitions=a, fmt=FormatV04() + ) assert "plate" in self.root.attrs assert self.root.attrs["plate"]["acquisitions"] == a assert self.root.attrs["plate"]["columns"] == [{"name": "1"}] assert self.root.attrs["plate"]["rows"] == [{"name": "A"}] - assert self.root.attrs["plate"]["version"] == CurrentFormat().version + assert self.root.attrs["plate"]["version"] == FormatV04().version assert self.root.attrs["plate"]["wells"] == [ {"path": "A/1", "rowIndex": 0, "columnIndex": 0} ] @@ -811,12 +1054,14 @@ def test_acquisitions_maximal(self): "endtime": 1343749392000, } ] - write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], acquisitions=a) + write_plate_metadata( + self.root, ["A"], ["1"], ["A/1"], acquisitions=a, fmt=FormatV04() + ) assert "plate" in self.root.attrs assert self.root.attrs["plate"]["acquisitions"] == a assert self.root.attrs["plate"]["columns"] == [{"name": "1"}] assert self.root.attrs["plate"]["rows"] == [{"name": "A"}] - assert self.root.attrs["plate"]["version"] == CurrentFormat().version + assert self.root.attrs["plate"]["version"] == FormatV04().version assert self.root.attrs["plate"]["wells"] == [ {"path": "A/1", "rowIndex": 0, "columnIndex": 0} ] @@ -834,7 +1079,7 @@ def test_acquisitions_maximal(self): def test_invalid_acquisition_keys(self, acquisitions): with pytest.raises(ValueError): write_plate_metadata( - self.root, ["A"], ["1"], ["A/1"], acquisitions=acquisitions + self.root_v3, ["A"], ["1"], ["A/1"], acquisitions=acquisitions ) def test_unspecified_acquisition_keys(self): @@ -881,7 +1126,7 @@ def test_invalid_well_list(self, wells): ) def test_invalid_well_keys(self, wells): with pytest.raises(ValueError): - write_plate_metadata(self.root, ["A"], ["1"], wells) + write_plate_metadata(self.root, ["A"], ["1"], wells, fmt=FormatV04()) @pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03())) def test_legacy_unspecified_well_keys(self, fmt): @@ -913,11 +1158,11 @@ def test_unspecified_well_keys(self): "unspecified_key": "gamma", }, ] - write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells) + write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells, fmt=FormatV04()) assert "plate" in self.root.attrs assert self.root.attrs["plate"]["columns"] == [{"name": "1"}, {"name": "2"}] assert self.root.attrs["plate"]["rows"] == [{"name": "A"}, {"name": "B"}] - assert self.root.attrs["plate"]["version"] == CurrentFormat().version + assert self.root.attrs["plate"]["version"] == FormatV04().version assert self.root.attrs["plate"]["wells"] == wells def test_missing_well_keys(self): @@ -927,42 +1172,70 @@ def test_missing_well_keys(self): {"path": "B/1"}, ] with pytest.raises(ValueError): - write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells) + write_plate_metadata( + self.root, ["A", "B"], ["1", "2"], wells, fmt=FormatV04() + ) def test_well_not_in_rows(self): wells = ["A/1", "B/1", "C/1"] with pytest.raises(ValueError): - write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells) + write_plate_metadata( + self.root, ["A", "B"], ["1", "2"], wells, fmt=FormatV04() + ) def test_well_not_in_columns(self): wells = ["A/1", "A/2", "A/3"] with pytest.raises(ValueError): - write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells) + write_plate_metadata( + self.root, ["A", "B"], ["1", "2"], wells, fmt=FormatV04() + ) @pytest.mark.parametrize("rows", (["A", "B", "B"], ["A", "&"])) def test_invalid_rows(self, rows): with pytest.raises(ValueError): - write_plate_metadata(self.root, rows, ["1"], ["A/1"]) + write_plate_metadata(self.root, rows, ["1"], ["A/1"], fmt=FormatV04()) @pytest.mark.parametrize("columns", (["1", "2", "2"], ["1", "&"])) def test_invalid_columns(self, columns): with pytest.raises(ValueError): - write_plate_metadata(self.root, ["A"], columns, ["A/1"]) + write_plate_metadata(self.root, ["A"], columns, ["A/1"], fmt=FormatV04()) class TestWellMetadata: @pytest.fixture(autouse=True) def initdir(self, tmpdir): self.path = pathlib.Path(tmpdir.mkdir("data")) - self.store = parse_url(self.path, mode="w").store + # create zarr v2 group... + self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store self.root = zarr.group(store=self.store) + # create zarr v3 group too... + self.path_v3 = self.path / "v3" + store_v3 = parse_url(self.path_v3, mode="w").store + self.root_v3 = zarr.group(store=store_v3) + + @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05())) @pytest.mark.parametrize("images", (["0"], [{"path": "0"}])) - def test_minimal_well(self, images): - write_well_metadata(self.root, images) - assert "well" in self.root.attrs - assert self.root.attrs["well"]["images"] == [{"path": "0"}] - assert self.root.attrs["well"]["version"] == CurrentFormat().version + def test_minimal_well(self, images, fmt): + if fmt.version == "0.5": + group = self.root_v3 + else: + group = self.root + write_well_metadata(group, images) + # we want to be sure this is zarr v2 / v3, so we load json manually too + attrs = group.attrs + if fmt.version == "0.5": + attrs = attrs.get("ome") + assert attrs["version"] == fmt.version + json_text = (self.path_v3 / "zarr.json").read_text(encoding="utf-8") + attrs_json = json.loads(json_text).get("attributes", {}).get("ome", {}) + else: + json_text = (self.path / ".zattrs").read_text(encoding="utf-8") + attrs_json = json.loads(json_text) + assert attrs["well"]["version"] == fmt.version + + assert "well" in attrs_json + assert attrs["well"]["images"] == [{"path": "0"}] @pytest.mark.parametrize( "images", @@ -976,14 +1249,14 @@ def test_minimal_well(self, images): ), ) def test_multiple_images(self, images): - write_well_metadata(self.root, images) - assert "well" in self.root.attrs - assert self.root.attrs["well"]["images"] == [ + write_well_metadata(self.root_v3, images) + assert "well" in self.root_v3.attrs.get("ome", {}) + assert self.root_v3.attrs["ome"]["well"]["images"] == [ {"path": "0"}, {"path": "1"}, {"path": "2"}, ] - assert self.root.attrs["well"]["version"] == CurrentFormat().version + self.root_v3.attrs["ome"]["version"] == FormatV05().version @pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03())) def test_version(self, fmt): @@ -1001,7 +1274,7 @@ def test_multiple_acquisitions(self): write_well_metadata(self.root, images) assert "well" in self.root.attrs assert self.root.attrs["well"]["images"] == images - assert self.root.attrs["well"]["version"] == CurrentFormat().version + assert self.root.attrs["well"]["version"] == FormatV04().version @pytest.mark.parametrize( "images", @@ -1025,22 +1298,27 @@ def test_unspecified_images_keys(self): write_well_metadata(self.root, images) assert "well" in self.root.attrs assert self.root.attrs["well"]["images"] == images - assert self.root.attrs["well"]["version"] == CurrentFormat().version + assert self.root.attrs["well"]["version"] == FormatV04().version class TestLabelWriter: @pytest.fixture(autouse=True) def initdir(self, tmpdir): - self.path = pathlib.Path(tmpdir.mkdir("data.ome.zarr")) - self.store = parse_url(self.path, mode="w").store + self.path = pathlib.Path(tmpdir.mkdir("data")) + # create zarr v2 group... + self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store self.root = zarr.group(store=self.store) + # create zarr v3 group... + self.path_v3 = self.path / "v3" + store_v3 = parse_url(self.path_v3, mode="w").store + self.root_v3 = zarr.group(store=store_v3) - def create_image_data(self, shape, scaler, fmt, axes, transformations): + def create_image_data(self, group, shape, scaler, fmt, axes, transformations): rng = np.random.default_rng(0) data = rng.poisson(10, size=shape).astype(np.uint8) write_image( image=data, - group=self.root, + group=group, scaler=scaler, fmt=fmt, axes=axes, @@ -1066,9 +1344,11 @@ def scaler(self, request): else: return None - def verify_label_data(self, label_name, label_data, fmt, shape, transformations): + def verify_label_data( + self, img_path, label_name, label_data, fmt, shape, transformations + ): # Verify image data - reader = Reader(parse_url(f"{self.path}/labels/{label_name}")) + reader = Reader(parse_url(f"{img_path}/labels/{label_name}")) node = next(iter(reader())) assert Multiscales.matches(node.zarr) if fmt.version in ("0.1", "0.2"): @@ -1086,16 +1366,24 @@ def verify_label_data(self, label_name, label_data, fmt, shape, transformations) assert np.allclose(label_data, node.data[0][...].compute()) # Verify label metadata - label_root = zarr.open(f"{self.path}/labels", "r") - assert "labels" in label_root.attrs - assert label_name in label_root.attrs["labels"] - - label_group = zarr.open(f"{self.path}/labels/{label_name}", "r") - assert "image-label" in label_group.attrs - assert label_group.attrs["image-label"]["version"] == fmt.version + label_root = zarr.open(f"{img_path}/labels", mode="r") + label_attrs = label_root.attrs + if fmt.version == "0.5": + label_attrs = label_attrs["ome"] + assert "labels" in label_attrs + assert label_name in label_attrs["labels"] + + label_group = zarr.open(f"{img_path}/labels/{label_name}", mode="r") + imglabel_attrs = label_group.attrs + if fmt.version == "0.5": + imglabel_attrs = imglabel_attrs["ome"] + assert imglabel_attrs["version"] == fmt.version + else: + assert imglabel_attrs["image-label"]["version"] == fmt.version + assert "image-label" in imglabel_attrs # Verify multiscale metadata - name = label_group.attrs["multiscales"][0].get("name", "") + name = imglabel_attrs["multiscales"][0].get("name", "") assert label_name == name @pytest.mark.parametrize( @@ -1105,11 +1393,19 @@ def verify_label_data(self, label_name, label_data, fmt, shape, transformations) pytest.param(FormatV02, id="V02"), pytest.param(FormatV03, id="V03"), pytest.param(FormatV04, id="V04"), + pytest.param(FormatV05, id="V05"), ), ) @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) def test_write_labels(self, shape, scaler, format_version, array_constructor): fmt = format_version() + if fmt.version == "0.5": + img_path = self.path_v3 + group = self.root_v3 + else: + img_path = self.path + group = self.root + axes = "tczyx"[-len(shape) :] transformations = [] for dataset_transfs in TRANSFORMATIONS: @@ -1132,18 +1428,20 @@ def test_write_labels(self, shape, scaler, format_version, array_constructor): label_data = array_constructor(label_data) # create the root level image data - self.create_image_data(shape, scaler, fmt, axes, transformations) + self.create_image_data(group, shape, scaler, fmt, axes, transformations) write_labels( label_data, - self.root, + group, scaler=scaler, name=label_name, fmt=fmt, axes=axes, coordinate_transformations=transformations, ) - self.verify_label_data(label_name, label_data, fmt, shape, transformations) + self.verify_label_data( + img_path, label_name, label_data, fmt, shape, transformations + ) @pytest.mark.parametrize( "format_version", @@ -1152,6 +1450,7 @@ def test_write_labels(self, shape, scaler, format_version, array_constructor): pytest.param(FormatV02, id="V02"), pytest.param(FormatV03, id="V03"), pytest.param(FormatV04, id="V04"), + pytest.param(FormatV05, id="V05"), ), ) @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) @@ -1159,6 +1458,12 @@ def test_write_multiscale_labels( self, shape, scaler, format_version, array_constructor ): fmt = format_version() + if fmt.version == "0.5": + img_path = self.path_v3 + group = self.root_v3 + else: + img_path = self.path + group = self.root axes = "tczyx"[-len(shape) :] transformations = [] for dataset_transfs in TRANSFORMATIONS: @@ -1185,20 +1490,32 @@ def test_write_multiscale_labels( labels_mip = scaler.nearest(label_data) # create the root level image data - self.create_image_data(shape, scaler, fmt, axes, transformations) + self.create_image_data(group, shape, scaler, fmt, axes, transformations) write_multiscale_labels( labels_mip, - self.root, + group, name=label_name, fmt=fmt, axes=axes, coordinate_transformations=transformations, ) - self.verify_label_data(label_name, label_data, fmt, shape, transformations) + self.verify_label_data( + img_path, label_name, label_data, fmt, shape, transformations + ) + @pytest.mark.parametrize( + "fmt", + (pytest.param(FormatV04(), id="V04"), pytest.param(FormatV05(), id="V05")), + ) @pytest.mark.parametrize("array_constructor", [np.array, da.from_array]) - def test_two_label_images(self, array_constructor): + def test_two_label_images(self, array_constructor, fmt): + if fmt.version == "0.5": + img_path = self.path_v3 + group = self.root_v3 + else: + img_path = self.path + group = self.root axes = "tczyx" transformations = [] for dataset_transfs in TRANSFORMATIONS: @@ -1208,8 +1525,8 @@ def test_two_label_images(self, array_constructor): # create the root level image data shape = (1, 2, 1, 256, 256) scaler = Scaler() - fmt = FormatV04() self.create_image_data( + group, shape, scaler, axes=axes, @@ -1225,17 +1542,21 @@ def test_two_label_images(self, array_constructor): write_multiscale_labels( labels_mip, - self.root, + group, name=label_name, + fmt=fmt, axes=axes, coordinate_transformations=transformations, ) - self.verify_label_data(label_name, label_data, fmt, shape, transformations) + self.verify_label_data( + img_path, label_name, label_data, fmt, shape, transformations + ) # Verify label metadata - label_root = zarr.open(f"{self.path}/labels", "r") - assert "labels" in label_root.attrs - assert len(label_root.attrs["labels"]) == len(label_names) - assert all( - label_name in label_root.attrs["labels"] for label_name in label_names - ) + label_root = zarr.open(f"{img_path}/labels", mode="r") + attrs = label_root.attrs + if fmt.version == "0.5": + attrs = attrs["ome"] + assert "labels" in attrs + assert len(attrs["labels"]) == len(label_names) + assert all(label_name in attrs["labels"] for label_name in label_names)