diff --git a/.isort.cfg b/.isort.cfg
index 52ac204c..0f1f7e33 100644
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -1,5 +1,5 @@
[settings]
-known_third_party = dask,fsspec,numcodecs,numpy,pytest,scipy,skimage,zarr
+known_third_party = dask,numcodecs,numpy,pytest,scipy,skimage,zarr
multi_line_output = 3
include_trailing_comma = True
force_grid_wrap = 0
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 7f120630..ecef8025 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -9,7 +9,7 @@ version: 2
build:
os: ubuntu-22.04
tools:
- python: "3.10"
+ python: "3.12"
# Build documentation in the docs/ directory with Sphinx
sphinx:
diff --git a/README.rst b/README.rst
index 9a9fd538..19c04947 100644
--- a/README.rst
+++ b/README.rst
@@ -6,7 +6,7 @@ ome-zarr-py
Tools for multi-resolution images stored in Zarr filesets, according to the `OME NGFF spec`_.
-See `Readthedocs `_ for usage information.
+See `Documentation `_ for usage information.
Documentation
-------------
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 43edcccf..15541c48 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,7 @@
-sphinx==7.1.2
+sphinx==8.1.3
sphinx-rtd-theme==3.0.2
fsspec
-zarr
+zarr>=v3.0.0
dask
numpy
scipy
diff --git a/docs/source/cli.rst b/docs/source/cli.rst
index 66f7a665..608c5993 100644
--- a/docs/source/cli.rst
+++ b/docs/source/cli.rst
@@ -19,11 +19,11 @@ Use the `ome_zarr` command to interrogate Zarr datasets.
Remote data::
- ome_zarr info https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/6001240.zarr/
+ ome_zarr info https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr/
Local data::
- ome_zarr info 6001240.zarr/
+ ome_zarr info 6001240_labels.zarr/
view
====
@@ -47,11 +47,11 @@ download
To download all the resolutions and metadata for an image use ``ome_zarr download``. This creates ``6001240.zarr`` locally::
- ome_zarr download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/6001240.zarr/
+ ome_zarr download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr
Specify a different output directory::
- ome_zarr download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/6001240.zarr/ --output image_dir
+ ome_zarr download https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr --output image_dir
create
======
@@ -59,13 +59,15 @@ create
To create sample OME-Zarr image from the `skimage `_
data.
-Create an OME-Zarr image in coinsdata/ dir::
+Create an OME-Zarr image in coinsdata/ dir using 'coins' method in OME-Zarr latest version or v0.4::
- ome_zarr create coinsdata
+ ome_zarr create coinsdata.zarr
+
+ ome_zarr create coinsdata.zarr --format 0.4
Create an rgb image from skimage astronaut in testimage dir::
- ome_zarr create testimage --method=astronaut
+ ome_zarr create testimage.zarr --method=astronaut
csv to labels
=============
diff --git a/docs/source/index.rst b/docs/source/index.rst
index a27693c4..8dacb7c1 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -4,6 +4,8 @@ ome-zarr-py
Tools for reading and writing multi-resolution images stored in Zarr filesets, according to the `OME NGFF spec`_.
+NB: The default version of OME-Zarr written by ``ome-zarr-py`` is ``v0.5``, which uses ``zarr v3``. OME-Zarr v0.5
+is not yet supported by all OME-Zarr tools. See the documentation for more information on how to write other versions.
Features
--------
diff --git a/docs/source/python.rst b/docs/source/python.rst
index 9a4c7840..c98e64f6 100644
--- a/docs/source/python.rst
+++ b/docs/source/python.rst
@@ -13,13 +13,18 @@ of 2 in the X and Y dimensions.
Alternatively, the :py:func:`ome_zarr.writer.write_multiscale` can be used, which takes a
"pyramid" of pre-computed `numpy` arrays.
+The default version of OME-NGFF is v0.5, is based on Zarr v3. A zarr v3 store is created
+by `parse_url()` below. To write OME-NGFF v0.4 (Zarr v2), use the `fmt=FormatV04()` argument
+in `parse_url()`, which will create a Zarr v2 store.
+
The following code creates a 3D Image in OME-Zarr::
import numpy as np
import zarr
from ome_zarr.io import parse_url
- from ome_zarr.writer import write_image
+ from ome_zarr.format import FormatV04
+ from ome_zarr.writer import write_image, add_metadata
path = "test_ngff_image.zarr"
@@ -28,10 +33,11 @@ The following code creates a 3D Image in OME-Zarr::
rng = np.random.default_rng(0)
data = rng.poisson(lam=10, size=(size_z, size_xy, size_xy)).astype(np.uint8)
- # write the image data
+ # Use fmt=FormatV04() to write v0.4 format (zarr v2)
store = parse_url(path, mode="w").store
root = zarr.group(store=store)
- write_image(image=data, group=root, axes="zyx", storage_options=dict(chunks=(1, size_xy, size_xy)))
+ write_image(image=data, group=root, axes="zyx",
+ storage_options=dict(chunks=(1, size_xy, size_xy)))
This image can be viewed in `napari` using the
@@ -41,18 +47,18 @@ This image can be viewed in `napari` using the
Rendering settings
------------------
-Render settings can be added to an existing zarr group::
+Rendering settings can be added to an existing zarr group::
store = parse_url(path, mode="w").store
root = zarr.group(store=store)
- root.attrs["omero"] = {
+ add_metadata(root, {"omero": {
"channels": [{
"color": "00FFFF",
"window": {"start": 0, "end": 20, "min": 0, "max": 255},
"label": "random",
"active": True,
}]
- }
+ }})
Writing labels
--------------
@@ -64,10 +70,11 @@ The following code creates a 3D Image in OME-Zarr with labels::
import os
from skimage.data import binary_blobs
+ from ome_zarr.format import FormatV04
from ome_zarr.io import parse_url
- from ome_zarr.writer import write_image
+ from ome_zarr.writer import write_image, add_metadata
- path = "test_ngff_image.zarr"
+ path = "test_ngff_image_labels.zarr"
os.mkdir(path)
mean_val=10
@@ -76,19 +83,20 @@ The following code creates a 3D Image in OME-Zarr with labels::
rng = np.random.default_rng(0)
data = rng.poisson(mean_val, size=(size_z, size_xy, size_xy)).astype(np.uint8)
- # write the image data
+ # Use fmt=FormatV04() to write v0.4 format (zarr v2)
store = parse_url(path, mode="w").store
root = zarr.group(store=store)
- write_image(image=data, group=root, axes="zyx", storage_options=dict(chunks=(1, size_xy, size_xy)))
+ write_image(image=data, group=root, axes="zyx",
+ storage_options=dict(chunks=(1, size_xy, size_xy)))
# optional rendering settings
- root.attrs["omero"] = {
+ add_metadata(root, {"omero": {
"channels": [{
"color": "00FFFF",
"window": {"start": 0, "end": 20, "min": 0, "max": 255},
"label": "random",
"active": True,
}]
- }
+ }})
# add labels...
@@ -104,18 +112,19 @@ The following code creates a 3D Image in OME-Zarr with labels::
labels_grp = root.create_group("labels")
# the 'labels' .zattrs lists the named labels data
label_name = "blobs"
- labels_grp.attrs["labels"] = [label_name]
+ add_metadata(labels_grp, {"labels": [label_name]})
label_grp = labels_grp.create_group(label_name)
- # need 'image-label' attr to be recognized as label
- label_grp.attrs["image-label"] = {
+ write_image(label, label_grp, axes="zyx")
+
+ # we need 'image-label' attr to be recognized as label
+ add_metadata(label_grp, {"image-label": {
"colors": [
{"label-value": 1, "rgba": [255, 0, 0, 255]},
{"label-value": 2, "rgba": [0, 255, 0, 255]},
{"label-value": 3, "rgba": [255, 255, 0, 255]}
]
- }
+ }})
- write_image(label, label_grp, axes="zyx")
Writing HCS datasets to OME-NGFF
--------------------------------
@@ -125,6 +134,7 @@ This sample code shows how to write a high-content screening dataset (i.e. cultu
import numpy as np
import zarr
+ from ome_zarr.format import FormatV04
from ome_zarr.io import parse_url
from ome_zarr.writer import write_image, write_plate_metadata, write_well_metadata
@@ -144,6 +154,7 @@ This sample code shows how to write a high-content screening dataset (i.e. cultu
data = rng.poisson(mean_val, size=(num_wells, num_fields, size_z, size_xy, size_xy)).astype(np.uint8)
# write the plate of images and corresponding metadata
+ # Use fmt=FormatV04() in parse_url() to write v0.4 format (zarr v2)
store = parse_url(path, mode="w").store
root = zarr.group(store=store)
write_plate_metadata(root, row_names, col_names, well_paths)
@@ -154,7 +165,8 @@ This sample code shows how to write a high-content screening dataset (i.e. cultu
write_well_metadata(well_group, field_paths)
for fi, field in enumerate(field_paths):
image_group = well_group.require_group(str(field))
- write_image(image=data[wi, fi], group=image_group, axes="zyx", storage_options=dict(chunks=(1, size_xy, size_xy)))
+ write_image(image=data[wi, fi], group=image_group, axes="zyx",
+ storage_options=dict(chunks=(1, size_xy, size_xy)))
This image can be viewed in `napari` using the
@@ -177,11 +189,9 @@ the data is available as `dask` arrays::
from ome_zarr.reader import Reader
import napari
- url = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr"
+ url = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr"
# read the image data
- store = parse_url(url, mode="r").store
-
reader = Reader(parse_url(url))
# nodes may include images, labels etc
nodes = list(reader())
@@ -207,6 +217,7 @@ Writing big image from tiles::
import os
import zarr
from ome_zarr.io import parse_url
+ from ome_zarr.format import CurrentFormat, FormatV04
from ome_zarr.reader import Reader
from ome_zarr.writer import write_multiscales_metadata
from ome_zarr.dask_utils import resize as da_resize
@@ -214,19 +225,24 @@ Writing big image from tiles::
import dask.array as da
from math import ceil
+ fmt = CurrentFormat()
+ # Use fmt=FormatV04() to write v0.4 format (zarr v2)
+
url = "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.3/9836842.zarr"
reader = Reader(parse_url(url))
nodes = list(reader())
# first level of the pyramid
dask_data = nodes[0].data[0]
tile_size = 512
+ axes = [{"name": "c", "type": "channel"}, {"name": "y", "type": "space"}, {"name": "x", "type": "space"}]
def downsample_pyramid_on_disk(parent, paths):
"""
Takes a high-resolution Zarr array at paths[0] in the zarr group
and down-samples it by a factor of 2 for each of the other paths
"""
- group_path = parent.store.path
+ group_path = str(parent.store_path)
+ img_path = parent.store_path / parent.path
image_path = os.path.join(group_path, parent.path)
print("downsample_pyramid_on_disk", image_path)
for count, path in enumerate(paths[1:]):
@@ -246,10 +262,16 @@ Writing big image from tiles::
dask_image, tuple(dims), preserve_range=True, anti_aliasing=False
)
+ options = {}
+ if fmt.zarr_format == 2:
+ options["dimension_separator"] = "/"
+ else:
+ options["chunk_key_encoding"] = fmt.chunk_key_encoding
+ options["dimension_names"] = [axis["name"] for axis in axes]
# write to disk
da.to_zarr(
- arr=output, url=image_path, component=path,
- dimension_separator=parent._store._dimension_separator,
+ arr=output, url=img_path, component=path,
+ zarr_format=fmt.zarr_format, **options
)
return paths
@@ -270,16 +292,18 @@ Writing big image from tiles::
row_count = ceil(shape[-2]/tile_size)
col_count = ceil(shape[-1]/tile_size)
- store = parse_url("9836842.zarr", mode="w").store
+ store = parse_url("9836842.zarr", mode="w", fmt=fmt).store
root = zarr.group(store=store)
# create empty array at root of pyramid
- zarray = root.require_dataset(
+ zarray = root.require_array(
"0",
shape=shape,
exact=True,
chunks=chunks,
dtype=d_type,
+ chunk_key_encoding=fmt.chunk_key_encoding,
+ dimension_names=[axis["name"] for axis in axes], # omit for v0.4
)
print("row_count", row_count, "col_count", col_count)
@@ -296,7 +320,6 @@ Writing big image from tiles::
zarray[ch_index, y1:y2, x1:x2] = tile
paths = ["0", "1", "2"]
- axes = [{"name": "c", "type": "channel"}, {"name": "y", "type": "space"}, {"name": "x", "type": "space"}]
# We have "0" array. This downsamples (in X and Y dims only) to create "1" and "2"
downsample_pyramid_on_disk(root, paths)
@@ -313,7 +336,8 @@ Writing big image from tiles::
write_multiscales_metadata(root, datasets, axes=axes)
-Using dask to fetch::
+Using dask to fetch. Here concatenate lazy "delayed" source of tiles into a full image.
+When that dask data is passed to write_image() the tiles will be loaded on the fly::
# Created for https://forum.image.sc/t/writing-tile-wise-ome-zarr-with-pyramid-size/85063
@@ -323,9 +347,11 @@ Using dask to fetch::
from dask import delayed
from ome_zarr.io import parse_url
- from ome_zarr.writer import write_image, write_multiscales_metadata
+ from ome_zarr.format import FormatV04
+ from ome_zarr.writer import write_image, add_metadata
zarr_name = "test_dask.zarr"
+ # Use fmt=FormatV04() in parse_url() to write v0.4 format (zarr v2)
store = parse_url(zarr_name, mode="w").store
root = zarr.group(store=store)
@@ -374,7 +400,7 @@ Using dask to fetch::
# This will create a downsampled 'multiscales' pyramid
write_image(dask_data, root, axes="czyx")
- root.attrs["omero"] = {
+ add_metadata(root, {"omero": {
"channels": [
{
"color": "FF0000",
@@ -389,7 +415,7 @@ Using dask to fetch::
"active": True,
},
]
- }
+ }})
print("Created image. Open with...")
print(f"ome_zarr view {zarr_name}")
diff --git a/ome_zarr/cli.py b/ome_zarr/cli.py
index 3d88083f..c95ae9e6 100644
--- a/ome_zarr/cli.py
+++ b/ome_zarr/cli.py
@@ -6,6 +6,7 @@
from .csv import csv_to_zarr
from .data import astronaut, coins, create_zarr
+from .format import CurrentFormat, Format, format_from_version
from .scale import Scaler
from .utils import download as zarr_download
from .utils import finder as bff_finder
@@ -63,7 +64,11 @@ def create(args: argparse.Namespace) -> None:
label_name = "circles"
else:
raise Exception(f"unknown method: {args.method}")
- create_zarr(args.path, method=method, label_name=label_name)
+ fmt: Format = CurrentFormat()
+ if args.format:
+ fmt = format_from_version(args.format)
+
+ create_zarr(args.path, method=method, label_name=label_name, fmt=fmt)
def scale(args: argparse.Namespace) -> None:
@@ -147,6 +152,9 @@ def main(args: list[str] | None = None) -> None:
"--method", choices=("coins", "astronaut"), default="coins"
)
parser_create.add_argument("path")
+ parser_create.add_argument(
+ "--format", help="OME-Zarr version to create. e.g. '0.4'"
+ )
parser_create.set_defaults(func=create)
parser_scale = subparsers.add_parser("scale")
diff --git a/ome_zarr/data.py b/ome_zarr/data.py
index 33f766e2..0e696bc7 100644
--- a/ome_zarr/data.py
+++ b/ome_zarr/data.py
@@ -15,7 +15,7 @@
from .format import CurrentFormat, Format
from .io import parse_url
from .scale import Scaler
-from .writer import write_multiscale
+from .writer import add_metadata, write_multiscale
CHANNEL_DIMENSION = 1
@@ -127,7 +127,7 @@ def create_zarr(
"""Generate a synthetic image pyramid with labels."""
pyramid, labels = method()
- loc = parse_url(zarr_directory, mode="w")
+ loc = parse_url(zarr_directory, mode="w", fmt=fmt)
assert loc
grp = zarr.group(loc.store)
axes = None
@@ -162,6 +162,7 @@ def create_zarr(
{
"window": {"start": 0, "end": 255, "min": 0, "max": 255},
"color": "FF0000",
+ "active": True,
}
],
"rdefs": {"model": "greyscale"},
@@ -196,17 +197,18 @@ def create_zarr(
axes=axes,
storage_options=storage_options,
metadata={"omero": image_data},
+ fmt=fmt,
)
if labels:
labels_grp = grp.create_group("labels")
- labels_grp.attrs["labels"] = [label_name]
+ add_metadata(labels_grp, {"labels": [label_name]})
label_grp = labels_grp.create_group(label_name)
if axes is not None:
# remove channel axis for masks
axes = axes.replace("c", "")
- write_multiscale(labels, label_grp, axes=axes)
+ write_multiscale(labels, label_grp, axes=axes, fmt=fmt)
colors = []
properties = []
@@ -214,11 +216,16 @@ def create_zarr(
rgba = [randrange(0, 256) for i in range(4)]
colors.append({"label-value": x, "rgba": rgba})
properties.append({"label-value": x, "class": f"class {x}"})
- label_grp.attrs["image-label"] = {
- "version": fmt.version,
- "colors": colors,
- "properties": properties,
- "source": {"image": "../../"},
- }
+ add_metadata(
+ label_grp,
+ {
+ "image-label": {
+ "version": fmt.version,
+ "colors": colors,
+ "properties": properties,
+ "source": {"image": "../../"},
+ }
+ },
+ )
return grp
diff --git a/ome_zarr/format.py b/ome_zarr/format.py
index e0fba096..40170ac1 100644
--- a/ome_zarr/format.py
+++ b/ome_zarr/format.py
@@ -3,9 +3,9 @@
import logging
from abc import ABC, abstractmethod
from collections.abc import Iterator
-from typing import Any
+from typing import Any, Dict
-from zarr.storage import FSStore
+from zarr.storage import FsspecStore, LocalStore
LOGGER = logging.getLogger("ome_zarr.format")
@@ -25,6 +25,7 @@ def format_implementations() -> Iterator["Format"]:
"""
Return an instance of each format implementation, newest to oldest.
"""
+ yield FormatV05()
yield FormatV04()
yield FormatV03()
yield FormatV02()
@@ -55,12 +56,22 @@ class Format(ABC):
def version(self) -> str: # pragma: no cover
raise NotImplementedError()
+ @property
+ @abstractmethod
+ def zarr_format(self) -> int: # pragma: no cover
+ raise NotImplementedError()
+
+ @property
+ @abstractmethod
+ def chunk_key_encoding(self) -> Dict[str, str]: # pragma: no cover
+ raise NotImplementedError()
+
@abstractmethod
def matches(self, metadata: dict) -> bool: # pragma: no cover
raise NotImplementedError()
@abstractmethod
- def init_store(self, path: str, mode: str = "r") -> FSStore:
+ def init_store(self, path: str, mode: str = "r") -> FsspecStore | LocalStore:
raise NotImplementedError()
# @abstractmethod
@@ -129,14 +140,35 @@ class FormatV01(Format):
def version(self) -> str:
return "0.1"
+ @property
+ def zarr_format(self) -> int:
+ return 2
+
+ @property
+ def chunk_key_encoding(self) -> Dict[str, str]:
+ return {"name": "v2", "separator": "."}
+
def matches(self, metadata: dict) -> bool:
version = self._get_metadata_version(metadata)
LOGGER.debug("%s matches %s?", self.version, version)
return version == self.version
- def init_store(self, path: str, mode: str = "r") -> FSStore:
- store = FSStore(path, mode=mode, dimension_separator=".")
- LOGGER.debug("Created legacy flat FSStore(%s, %s)", path, mode)
+ def init_store(self, path: str, mode: str = "r") -> FsspecStore | LocalStore:
+ """
+ Not ideal. Stores should remain hidden
+ "dimension_separator" is specified at array creation time
+ """
+
+ if path.startswith(("http", "s3")):
+ store = FsspecStore.from_url(
+ path,
+ storage_options=None,
+ read_only=(mode in ("r", "r+", "a")),
+ )
+ else:
+ # No other kwargs supported
+ store = LocalStore(path, read_only=(mode in ("r", "r+", "a")))
+ LOGGER.debug("Created nested FsspecStore(%s, %s)", path, mode)
return store
def generate_well_dict(
@@ -180,31 +212,9 @@ class FormatV02(FormatV01):
def version(self) -> str:
return "0.2"
- def init_store(self, path: str, mode: str = "r") -> FSStore:
- """
- Not ideal. Stores should remain hidden
- TODO: could also check dimension_separator
- """
-
- kwargs = {
- "dimension_separator": "/",
- "normalize_keys": False,
- }
-
- mkdir = True
- if "r" in mode or path.startswith(("http", "s3")):
- # Could be simplified on the fsspec side
- mkdir = False
- if mkdir:
- kwargs["auto_mkdir"] = True
-
- store = FSStore(
- path,
- mode=mode,
- **kwargs,
- ) # TODO: open issue for using Path
- LOGGER.debug("Created nested FSStore(%s, %s, %s)", path, mode, kwargs)
- return store
+ @property
+ def chunk_key_encoding(self) -> Dict[str, str]:
+ return {"name": "v2", "separator": "/"}
class FormatV03(FormatV02): # inherits from V02 to avoid code duplication
@@ -343,4 +353,23 @@ def validate_coordinate_transformations(
)
-CurrentFormat = FormatV04
+class FormatV05(FormatV04):
+ """
+ Changelog: added FormatV05 (May 2025): writing not supported yet
+ """
+
+ @property
+ def version(self) -> str:
+ return "0.5"
+
+ @property
+ def zarr_format(self) -> int:
+ return 3
+
+ @property
+ def chunk_key_encoding(self) -> Dict[str, str]:
+ # this is default for Zarr v3. Could return None?
+ return {"name": "default", "separator": "/"}
+
+
+CurrentFormat = FormatV05
diff --git a/ome_zarr/io.py b/ome_zarr/io.py
index 3a4d2fd2..9af1b7e2 100644
--- a/ome_zarr/io.py
+++ b/ome_zarr/io.py
@@ -3,13 +3,13 @@
Primary entry point is the :func:`~ome_zarr.io.parse_url` method.
"""
-import json
import logging
from pathlib import Path
from urllib.parse import urljoin
import dask.array as da
-from zarr.storage import FSStore
+import zarr
+from zarr.storage import FsspecStore, LocalStore, StoreLike
from .format import CurrentFormat, Format, detect_format
from .types import JSONDict
@@ -19,7 +19,7 @@
class ZarrLocation:
"""
- IO primitive for reading and writing Zarr data. Uses FSStore for all
+ IO primitive for reading and writing Zarr data. Uses a store for all
data access.
No assumptions about the existence of the given path string are made.
@@ -28,7 +28,7 @@ class ZarrLocation:
def __init__(
self,
- path: Path | str | FSStore,
+ path: StoreLike,
mode: str = "r",
fmt: Format = CurrentFormat(),
) -> None:
@@ -39,18 +39,21 @@ def __init__(
self.__path = str(path.resolve())
elif isinstance(path, str):
self.__path = path
- elif isinstance(path, FSStore):
+ elif isinstance(path, FsspecStore):
self.__path = path.path
+ elif isinstance(path, LocalStore):
+ self.__path = str(path.root)
else:
raise TypeError(f"not expecting: {type(path)}")
loader = fmt
if loader is None:
loader = CurrentFormat()
- self.__store: FSStore = (
- path if isinstance(path, FSStore) else loader.init_store(self.__path, mode)
+ self.__store: FsspecStore = (
+ path
+ if isinstance(path, (FsspecStore, LocalStore))
+ else loader.init_store(self.__path, mode)
)
-
self.__init_metadata()
detected = detect_format(self.__metadata, loader)
LOGGER.debug("ZarrLocation.__init__ %s detected: %s", path, detected)
@@ -66,16 +69,35 @@ def __init_metadata(self) -> None:
"""
Load the Zarr metadata files for the given location.
"""
- self.zarray: JSONDict = self.get_json(".zarray")
- self.zgroup: JSONDict = self.get_json(".zgroup")
+ self.zgroup: JSONDict = {}
+ self.zarray: JSONDict = {}
self.__metadata: JSONDict = {}
self.__exists: bool = True
- if self.zgroup:
- self.__metadata = self.get_json(".zattrs")
- elif self.zarray:
- self.__metadata = self.get_json(".zattrs")
- else:
- self.__exists = False
+ # If we want to *create* a new zarr v2 group, we need to specify
+ # zarr_format. This is not needed for reading.
+ zarr_format = None
+ try:
+ # this group is used to get zgroup metadata
+ # used for info, download, Spec.match() via root_attrs() etc.
+ # and to check if the group exists for reading. Only need "r" mode for this.
+ group = zarr.open_group(
+ store=self.__store, path="/", mode="r", zarr_format=zarr_format
+ )
+ self.zgroup = group.attrs.asdict()
+ # For zarr v3, everything is under the "ome" namespace
+ if "ome" in self.zgroup:
+ self.zgroup = self.zgroup["ome"]
+ self.__metadata = self.zgroup
+ except (ValueError, FileNotFoundError):
+ # group doesn't exist. If we are in "w" mode, we need to create it.
+ if self.__mode == "w":
+ # If we are creating a new group, we need to specify the zarr_format.
+ zarr_format = self.__fmt.zarr_format
+ group = zarr.open_group(
+ store=self.__store, path="/", mode="w", zarr_format=zarr_format
+ )
+ else:
+ self.__exists = False
def __repr__(self) -> str:
"""Print the path as well as whether this is a group or an array."""
@@ -98,12 +120,17 @@ def fmt(self) -> Format:
def mode(self) -> str:
return self.__mode
+ @property
+ def version(self) -> str:
+ """Return the version of the OME-NGFF spec used for this location."""
+ return self.__fmt.version
+
@property
def path(self) -> str:
return self.__path
@property
- def store(self) -> FSStore:
+ def store(self) -> FsspecStore:
"""Return the initialized store for this location"""
assert self.__store is not None
return self.__store
@@ -144,26 +171,6 @@ def create(self, path: str) -> "ZarrLocation":
LOGGER.debug("open(%s(%s))", self.__class__.__name__, subpath)
return self.__class__(subpath, mode=self.__mode, fmt=self.__fmt)
- def get_json(self, subpath: str) -> JSONDict:
- """
- Load and return a given subpath of store as JSON.
-
- HTTP 403 and 404 responses are treated as if the file does not exist.
- Exceptions during the remote connection are logged at the WARN level.
- All other exceptions log at the ERROR level.
- """
- try:
- data = self.__store.get(subpath)
- if not data:
- return {}
- return json.loads(data)
- except KeyError:
- LOGGER.debug("JSON not found: %s", subpath)
- return {}
- except Exception:
- LOGGER.exception("Error while loading JSON")
- return {}
-
def parts(self) -> list[str]:
if self._isfile():
return list(Path(self.__path).parts)
@@ -192,10 +199,7 @@ def _isfile(self) -> bool:
Return whether the current underlying implementation
points to a local file or not.
"""
- return self.__store.fs.protocol == "file" or self.__store.fs.protocol == (
- "file",
- "local",
- )
+ return isinstance(self.__store, LocalStore)
def _ishttp(self) -> bool:
"""
@@ -220,13 +224,8 @@ def parse_url(
>>> parse_url('does-not-exist')
"""
- try:
- loc = ZarrLocation(path, mode=mode, fmt=fmt)
- if "r" in mode and not loc.exists():
- return None
- else:
- return loc
- except Exception:
- LOGGER.exception("exception on parsing (stacktrace at DEBUG)")
- LOGGER.debug("stacktrace:", exc_info=True)
+ loc = ZarrLocation(path, mode=mode, fmt=fmt)
+ if "r" in mode and not loc.exists():
return None
+ else:
+ return loc
diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py
index 628b0549..4430eb08 100644
--- a/ome_zarr/reader.py
+++ b/ome_zarr/reader.py
@@ -53,9 +53,7 @@ def __init__(
self.specs.append(Multiscales(self))
if OMERO.matches(zarr):
self.specs.append(OMERO(self))
- if plate_labels:
- self.specs.append(PlateLabels(self))
- elif Plate.matches(zarr):
+ if Plate.matches(zarr):
self.specs.append(Plate(self))
# self.add(zarr, plate_labels=True)
if Well.matches(zarr):
@@ -299,7 +297,7 @@ def __init__(self, node: Node) -> None:
LOGGER.info("datasets %s", datasets)
for resolution in self.datasets:
- data: da.core.Array = self.array(resolution, version)
+ data: da.core.Array = self.array(resolution)
chunk_sizes = [
str(c[0]) + (f" (+ {c[-1]})" if c[-1] != c[0] else "")
for c in data.chunks
@@ -320,7 +318,7 @@ def __init__(self, node: Node) -> None:
if child_zarr.exists():
node.add(child_zarr, visibility=False)
- def array(self, resolution: str, version: str) -> da.core.Array:
+ def array(self, resolution: str) -> da.core.Array:
# data.shape is (t, c, z, y, x) by convention
return self.zarr.load(resolution)
@@ -562,51 +560,6 @@ def get_tile(row: int, col: int) -> da.core.Array:
return da.concatenate(lazy_rows, axis=len(self.axes) - 2)
-class PlateLabels(Plate):
- def get_tile_path(self, level: int, row: int, col: int) -> str: # pragma: no cover
- """251.zarr/A/1/0/labels/0/3/"""
- path = (
- f"{self.row_names[row]}/{self.col_names[col]}/"
- f"{self.first_field_path}/labels/0/{level}"
- )
- return path
-
- def get_pyramid_lazy(self, node: Node) -> None: # pragma: no cover
- super().get_pyramid_lazy(node)
- # pyramid data may be multi-channel, but we only have 1 labels channel
- # TODO: when PlateLabels are re-enabled, update the logic to handle
- # 0.4 axes (list of dictionaries)
- if "c" in self.axes:
- c_index = self.axes.index("c")
- idx = [slice(None)] * len(self.axes)
- idx[c_index] = slice(0, 1)
- node.data[0] = node.data[0][tuple(idx)]
- # remove image metadata
- node.metadata = {}
-
- # combine 'properties' from each image
- # from https://github.com/ome/ome-zarr-py/pull/61/
- properties: dict[int, dict[str, Any]] = {}
- for row in self.row_names:
- for col in self.col_names:
- path = f"{row}/{col}/{self.first_field_path}/labels/0/.zattrs"
- labels_json = self.zarr.get_json(path).get("image-label", {})
- # NB: assume that 'label_val' is unique across all images
- props_list = labels_json.get("properties", [])
- if props_list:
- for props in props_list:
- label_val = props["label-value"]
- properties[label_val] = dict(props)
- del properties[label_val]["label-value"]
- node.metadata["properties"] = properties
-
- def get_numpy_type(self, image_node: Node) -> np.dtype: # pragma: no cover
- # FIXME - don't assume Well A1 is valid
- path = self.get_tile_path(0, 0, 0)
- label_zarr = self.zarr.load(path)
- return label_zarr.dtype
-
-
class Reader:
"""Parses the given Zarr instance into a collection of Nodes properly ordered
depending on context.
diff --git a/ome_zarr/scale.py b/ome_zarr/scale.py
index 0f39750b..0fb3e85e 100644
--- a/ome_zarr/scale.py
+++ b/ome_zarr/scale.py
@@ -138,7 +138,7 @@ def __assert_values(self, pyramid: list[np.ndarray]) -> None:
def __create_group(
self, store: MutableMapping, base: np.ndarray, pyramid: list[np.ndarray]
- ) -> zarr.hierarchy.Group:
+ ) -> zarr.Group:
"""Create group and datasets."""
grp = zarr.group(store)
grp.create_dataset("base", data=base)
diff --git a/ome_zarr/utils.py b/ome_zarr/utils.py
index 395e713d..3d58459d 100644
--- a/ome_zarr/utils.py
+++ b/ome_zarr/utils.py
@@ -21,9 +21,10 @@
import zarr
from dask.diagnostics import ProgressBar
+from .format import format_from_version
from .io import parse_url
from .reader import Multiscales, Node, Reader
-from .types import JSONDict
+from .types import Any, JSONDict
LOGGER = logging.getLogger("ome_zarr.utils")
@@ -54,6 +55,11 @@ def info(path: str, stats: bool = False) -> Iterator[Node]:
continue
print(node)
+ loc = node.zarr
+ version = loc.zgroup.get("version")
+ if version is None:
+ version = loc.zgroup.get("multiscales", [{}])[0].get("version", "")
+ print(" - version:", version)
print(" - metadata")
for spec in node.specs:
print(f" - {spec.__class__.__name__}")
@@ -72,7 +78,9 @@ def view(input_path: str, port: int = 8000, dry_run: bool = False) -> None:
# dry_run is for testing, so we don't open the browser or start the server
zarrs = []
- if (Path(input_path) / ".zattrs").exists():
+ if (Path(input_path) / ".zattrs").exists() or (
+ Path(input_path) / "zarr.json"
+ ).exists():
zarrs = find_multiscales(Path(input_path))
if len(zarrs) == 0:
print(
@@ -120,9 +128,18 @@ def find_multiscales(path_to_zattrs):
# We want full path to find the multiscales Image. e.g. full/path/to/image.zarr/0
# AND we want image Name, e.g. "image.zarr Series 0"
# AND we want the dir path to use for Tags e.g. full/path/to
- with open(path_to_zattrs / ".zattrs") as f:
- text = f.read()
+ text = None
+ for name in (".zattrs", "zarr.json"):
+ if (Path(path_to_zattrs) / name).exists():
+ with open(path_to_zattrs / name) as f:
+ text = f.read()
+ break
+ if text is None:
+ print("No .zattrs or zarr.json found in {path_to_zattrs}")
+ return []
zattrs = json.loads(text)
+ if "attributes" in zattrs and "ome" in zattrs["attributes"]:
+ zattrs = zattrs["attributes"]["ome"]
if "plate" in zattrs:
plate = zattrs.get("plate")
wells = plate.get("wells")
@@ -208,11 +225,11 @@ def finder(input_path: str, port: int = 8000, dry_run=False) -> None:
# walk the input path to find all .zattrs files...
def walk(path: Path):
- if (path / ".zattrs").exists():
+ if (path / ".zattrs").exists() or (path / "zarr.json").exists():
yield from find_multiscales(path)
else:
for p in path.iterdir():
- if (p / ".zattrs").exists():
+ if (p / ".zattrs").exists() or (p / "zarr.json").exists():
yield from find_multiscales(p)
elif p.is_dir():
yield from walk(p)
@@ -322,26 +339,55 @@ def download(input_path: str, output_dir: str = ".") -> None:
target_path = output_path / Path(*path)
target_path.mkdir(parents=True)
- with (target_path / ".zgroup").open("w") as f:
+ # Use version etc...
+ version = node.zarr.version
+ fmt = format_from_version(version)
+
+ # store = parse_url(input_path, mode="w", fmt=fmt)
+ group_file = "zarr.json"
+ attrs_file = "zarr.json"
+ if fmt.zarr_format == 2:
+ group_file = ".zgroup"
+ attrs_file = ".zattrs"
+
+ with (target_path / group_file).open("w") as f:
f.write(json.dumps(node.zarr.zgroup))
- with (target_path / ".zattrs").open("w") as f:
+ with (target_path / attrs_file).open("w") as f:
metadata: JSONDict = {}
node.write_metadata(metadata)
+ if fmt.zarr_format == 3:
+ # For zarr v3, we need to put metadata under "ome" namespace
+ metadata = {
+ "attributes": {"ome": metadata},
+ "zarr_format": 3,
+ "node_type": "group",
+ }
f.write(json.dumps(metadata))
resolutions: list[da.core.Array] = []
datasets: list[str] = []
+
for spec in node.specs:
if isinstance(spec, Multiscales):
datasets = spec.datasets
resolutions = node.data
+ options: dict[str, Any] = {}
+ if fmt.zarr_format == 2:
+ options["dimension_separator"] = "/"
+ else:
+ options["chunk_key_encoding"] = fmt.chunk_key_encoding
+ options["dimension_names"] = [
+ axis["name"] for axis in node.metadata["axes"]
+ ]
if datasets and resolutions:
pbar = ProgressBar()
for dataset, data in reversed(list(zip(datasets, resolutions))):
LOGGER.info("resolution %s...", dataset)
with pbar:
data.to_zarr(
- str(target_path / dataset), dimension_separator="/"
+ str(target_path / dataset),
+ zarr_format=fmt.zarr_format,
+ **options,
)
else:
# Assume a group that needs metadata, like labels
diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py
index e6e539bc..df836ac4 100644
--- a/ome_zarr/writer.py
+++ b/ome_zarr/writer.py
@@ -10,9 +10,10 @@
import numpy as np
import zarr
from dask.graph_manipulation import bind
+from numcodecs import Blosc
from .axes import Axes
-from .format import CurrentFormat, Format
+from .format import CurrentFormat, Format, FormatV04
from .scale import Scaler
from .types import JSONDict
@@ -171,11 +172,37 @@ def _validate_plate_wells(
return validated_wells
+def _blosc_compressor() -> Blosc:
+ """Return a Blosc compressor with zstd compression"""
+ return Blosc(cname="zstd", clevel=5, shuffle=Blosc.SHUFFLE)
+
+
+def check_format(
+ group: zarr.Group,
+ fmt: Format | None = None,
+) -> Format:
+ """Check if the format is valid for the given group"""
+
+ zarr_format = group.info._zarr_format
+ if fmt is not None:
+ if fmt.zarr_format != zarr_format:
+ raise ValueError(
+ f"Group is zarr_format: {zarr_format} but OME-Zarr {fmt.version} is {fmt.zarr_format}"
+ )
+ else:
+ if zarr_format == 2:
+ fmt = FormatV04()
+ elif zarr_format == 3:
+ fmt = CurrentFormat()
+ assert fmt is not None
+ return fmt
+
+
def write_multiscale(
pyramid: ListOfArrayLike,
group: zarr.Group,
chunks: tuple[Any, ...] | int | None = None,
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
axes: AxesType = None,
coordinate_transformations: list[list[dict[str, Any]]] | None = None,
storage_options: JSONDict | list[JSONDict] | None = None,
@@ -190,7 +217,7 @@ def write_multiscale(
:param pyramid:
The image data to save. Largest level first. All image arrays MUST be up to
5-dimensional with dimensions ordered (t, c, z, y, x)
- :type group: :class:`zarr.hierarchy.Group`
+ :type group: :class:`zarr.Group`
:param group: The group within the zarr store to store the data in
:type chunks: int or tuple of ints, optional
:param chunks:
@@ -226,6 +253,7 @@ def write_multiscale(
:class:`dask.delayed.Delayed` representing the value to be computed by
dask.
"""
+ fmt = check_format(group, fmt)
dims = len(pyramid[0].shape)
axes = _get_valid_axes(dims, axes, fmt)
dask_delayed = []
@@ -242,30 +270,62 @@ def write_multiscale(
# (which might have been changed for versions 0.1 or 0.2)
# if chunks are explicitly set in the storage options
chunks_opt = options.pop("chunks", chunks)
- # switch to this code in 0.5
- # chunks_opt = options.pop("chunks", None)
if chunks_opt is not None:
chunks_opt = _retuple(chunks_opt, data.shape)
+ options["chunk_key_encoding"] = fmt.chunk_key_encoding
+ zarr_format = fmt.zarr_format
+ compressor = options.pop("compressor", None)
+ if zarr_format == 2:
+ # by default we use Blosc with zstd compression
+ # Don't need this for zarr v3 as it has a default compressor
+ if compressor is None:
+ compressor = _blosc_compressor()
+ options["compressor"] = compressor
+ else:
+ if compressor is not None:
+ options["compressors"] = [compressor]
+ if axes is not None:
+ # the array zarr.json also contains axes names
+ # TODO: check if this is written by da.to_zarr
+ options["dimension_names"] = [
+ axis["name"] for axis in axes if isinstance(axis, dict)
+ ]
+
if isinstance(data, da.Array):
+ if zarr_format == 2:
+ options["dimension_separator"] = "/"
+ del options["chunk_key_encoding"]
+ # handle any 'chunks' option from storage_options
if chunks_opt is not None:
data = da.array(data).rechunk(chunks=chunks_opt)
- options["chunks"] = chunks_opt
da_delayed = da.to_zarr(
arr=data,
url=group.store,
component=str(Path(group.path, str(path))),
- storage_options=options,
- compressor=options.get("compressor", zarr.storage.default_compressor),
- dimension_separator=group._store._dimension_separator,
+ # IF we pass storage_options then dask NEEDS url to be a string
+ storage_options=None,
compute=compute,
+ zarr_format=zarr_format,
+ **options,
)
if not compute:
dask_delayed.append(da_delayed)
else:
- group.create_dataset(str(path), data=data, chunks=chunks_opt, **options)
+ if chunks_opt is not None:
+ options["chunks"] = chunks_opt
+ options["shape"] = data.shape
+ # otherwise we get 'null'
+ options["fill_value"] = 0
+
+ arr = group.create_array(
+ str(path),
+ dtype=data.dtype,
+ **options,
+ )
+ arr[slice(None)] = data
datasets.append({"path": str(path)})
@@ -297,7 +357,7 @@ def write_multiscale(
def write_multiscales_metadata(
group: zarr.Group,
datasets: list[dict],
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
axes: AxesType = None,
name: str | None = None,
**metadata: str | JSONDict | list[JSONDict],
@@ -305,7 +365,7 @@ def write_multiscales_metadata(
"""
Write the multiscales metadata in the group.
- :type group: :class:`zarr.hierarchy.Group`
+ :type group: :class:`zarr.Group`
:param group: The group within the zarr store to write the metadata in.
:type datasets: list of dicts
:param datasets:
@@ -322,6 +382,7 @@ def write_multiscales_metadata(
Ignored for versions 0.1 and 0.2. Required for version 0.3 or greater.
"""
+ fmt = check_format(group, fmt)
ndim = -1
if axes is not None:
if fmt.version in ("0.1", "0.2"):
@@ -337,7 +398,7 @@ def write_multiscales_metadata(
and isinstance(metadata["metadata"], dict)
and "omero" in metadata["metadata"]
):
- omero_metadata = metadata["metadata"].get("omero")
+ omero_metadata = metadata["metadata"].pop("omero")
if omero_metadata is None:
raise KeyError("If `'omero'` is present, value cannot be `None`.")
for c in omero_metadata["channels"]:
@@ -353,23 +414,26 @@ def write_multiscales_metadata(
if not isinstance(c["window"][p], (int, float)):
raise TypeError(f"`'{p}'` must be an int or float.")
- group.attrs["omero"] = omero_metadata
+ add_metadata(group, {"omero": omero_metadata})
# note: we construct the multiscale metadata via dict(), rather than {}
# to avoid duplication of protected keys like 'version' in **metadata
# (for {} this would silently over-write it, with dict() it explicitly fails)
multiscales = [
- dict(
- version=fmt.version,
- datasets=_validate_datasets(datasets, ndim, fmt),
- name=name or group.name,
- **metadata,
- )
+ dict(datasets=_validate_datasets(datasets, ndim, fmt), name=name or group.name)
]
+ if len(metadata.get("metadata", {})) > 0:
+ multiscales[0]["metadata"] = metadata["metadata"]
if axes is not None:
multiscales[0]["axes"] = axes
- group.attrs["multiscales"] = multiscales
+ if fmt.version in ("0.1", "0.2", "0.3", "0.4"):
+ multiscales[0]["version"] = fmt.version
+ else:
+ # Zarr v3 top-level version
+ add_metadata(group, {"version": fmt.version})
+
+ add_metadata(group, {"multiscales": multiscales})
def write_plate_metadata(
@@ -377,7 +441,7 @@ def write_plate_metadata(
rows: list[str],
columns: list[str],
wells: list[str | dict],
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
acquisitions: list[dict] | None = None,
field_count: int | None = None,
name: str | None = None,
@@ -385,7 +449,7 @@ def write_plate_metadata(
"""
Write the plate metadata in the group.
- :type group: :class:`zarr.hierarchy.Group`
+ :type group: :class:`zarr.Group`
:param group: The group within the zarr store to write the metadata in.
:type rows: list of str
:param rows: The list of names for the plate rows.
@@ -405,11 +469,11 @@ def write_plate_metadata(
:param field_count: The maximum number of fields per view across wells.
"""
+ fmt = check_format(group, fmt)
plate: dict[str, str | int | list[dict]] = {
"columns": _validate_plate_rows_columns(columns),
"rows": _validate_plate_rows_columns(rows),
"wells": _validate_plate_wells(wells, rows, columns, fmt=fmt),
- "version": fmt.version,
}
if name is not None:
plate["name"] = name
@@ -417,18 +481,24 @@ def write_plate_metadata(
plate["field_count"] = field_count
if acquisitions is not None:
plate["acquisitions"] = _validate_plate_acquisitions(acquisitions)
- group.attrs["plate"] = plate
+
+ if fmt.version in ("0.1", "0.2", "0.3", "0.4"):
+ plate["version"] = fmt.version
+ group.attrs["plate"] = plate
+ else:
+ # Zarr v3 metadata under 'ome' with top-level version
+ group.attrs["ome"] = {"version": fmt.version, "plate": plate}
def write_well_metadata(
group: zarr.Group,
images: list[str | dict],
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
) -> None:
"""
Write the well metadata in the group.
- :type group: :class:`zarr.hierarchy.Group`
+ :type group: :class:`zarr.Group`
:param group: The group within the zarr store to write the metadata in.
:type images: list of dict
:param images: The list of dictionaries for all fields of views.
@@ -438,11 +508,17 @@ def write_well_metadata(
Defaults to the most current.
"""
- well = {
+ fmt = check_format(group, fmt)
+ well: dict[str, Any] = {
"images": _validate_well_images(images),
- "version": fmt.version,
}
- group.attrs["well"] = well
+
+ if fmt.version in ("0.1", "0.2", "0.3", "0.4"):
+ well["version"] = fmt.version
+ group.attrs["well"] = well
+ else:
+ # Zarr v3 metadata under 'ome' with top-level version
+ group.attrs["ome"] = {"version": fmt.version, "well": well}
def write_image(
@@ -450,7 +526,7 @@ def write_image(
group: zarr.Group,
scaler: Scaler = Scaler(),
chunks: tuple[Any, ...] | int | None = None,
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
axes: AxesType = None,
coordinate_transformations: list[list[dict[str, Any]]] | None = None,
storage_options: JSONDict | list[JSONDict] | None = None,
@@ -465,7 +541,7 @@ def write_image(
if the scaler argument is non-None.
Image array MUST be up to 5-dimensional with dimensions
ordered (t, c, z, y, x). Image can be a numpy or dask Array.
- :type group: :class:`zarr.hierarchy.Group`
+ :type group: :class:`zarr.Group`
:param group: The group within the zarr store to write the metadata in.
:type scaler: :class:`ome_zarr.scale.Scaler`
:param scaler:
@@ -504,8 +580,11 @@ def write_image(
:class:`dask.delayed.Delayed` representing the value to be computed by
dask.
"""
+ fmt = check_format(group, fmt)
dask_delayed_jobs = []
+ name = metadata.pop("name", None)
+ name = str(name) if name is not None else None
if isinstance(image, da.Array):
dask_delayed_jobs = _write_dask_image(
image,
@@ -516,7 +595,7 @@ def write_image(
axes=axes,
coordinate_transformations=coordinate_transformations,
storage_options=storage_options,
- name=None,
+ name=name,
compute=compute,
**metadata,
)
@@ -530,7 +609,7 @@ def write_image(
axes=axes,
coordinate_transformations=coordinate_transformations,
storage_options=storage_options,
- name=None,
+ name=name,
compute=compute,
**metadata,
)
@@ -556,7 +635,7 @@ def _write_dask_image(
group: zarr.Group,
scaler: Scaler = Scaler(),
chunks: tuple[Any, ...] | int | None = None,
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
axes: AxesType = None,
coordinate_transformations: list[list[dict[str, Any]]] | None = None,
storage_options: JSONDict | list[JSONDict] | None = None,
@@ -564,6 +643,7 @@ def _write_dask_image(
compute: bool | None = True,
**metadata: str | JSONDict | list[JSONDict],
) -> list:
+ fmt = check_format(group, fmt)
if fmt.version in ("0.1", "0.2"):
# v0.1 and v0.2 are strictly 5D
shape_5d: tuple[Any, ...] = (*(1,) * (5 - image.ndim), *image.shape)
@@ -601,23 +681,44 @@ def _write_dask_image(
# chunks_opt = options.pop("chunks", None)
if chunks_opt is not None:
chunks_opt = _retuple(chunks_opt, image.shape)
+ # image.chunks will be used by da.to_zarr
image = da.array(image).rechunk(chunks=chunks_opt)
- options["chunks"] = chunks_opt
LOGGER.debug("chunks_opt: %s", chunks_opt)
shapes.append(image.shape)
LOGGER.debug(
"write dask.array to_zarr shape: %s, dtype: %s", image.shape, image.dtype
)
+ kwargs: dict[str, Any] = {}
+ zarr_format = fmt.zarr_format
+ if zarr_format == 2:
+ kwargs["dimension_separator"] = "/"
+ kwargs["compressor"] = options.pop("compressor", _blosc_compressor())
+ else:
+ kwargs["chunk_key_encoding"] = fmt.chunk_key_encoding
+ if axes is not None:
+ kwargs["dimension_names"] = [
+ a["name"] for a in axes if isinstance(a, dict)
+ ]
+ if "compressor" in options:
+ # We use 'compressors' for group.create_array() but da.to_zarr() below uses
+ # zarr.create() which doesn't support 'compressors'
+ # TypeError: AsyncArray._create() got an unexpected keyword argument 'compressors'
+ # kwargs["compressors"] = [options.pop("compressor", _blosc_compressor())]
+
+ # ValueError: compressor cannot be used for arrays with zarr_format 3.
+ # Use bytes-to-bytes codecs instead.
+ kwargs["compressor"] = options.pop("compressor")
+
delayed.append(
da.to_zarr(
arr=image,
url=group.store,
component=str(Path(group.path, str(path))),
- storage_options=options,
+ # storage_options=options,
compute=False,
- compressor=options.get("compressor", zarr.storage.default_compressor),
- dimension_separator=group._store._dimension_separator,
+ zarr_format=zarr_format,
+ **kwargs,
)
)
datasets.append({"path": str(path)})
@@ -655,7 +756,7 @@ def write_label_metadata(
name: str,
colors: list[JSONDict] | None = None,
properties: list[JSONDict] | None = None,
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
**metadata: list[JSONDict] | JSONDict | str,
) -> None:
"""
@@ -664,7 +765,7 @@ def write_label_metadata(
The label data must have been written to a sub-group,
with the same name as the second argument.
- :type group: :class:`zarr.hierarchy.Group`
+ :type group: :class:`zarr.Group`
:param group: The group within the zarr store to write the metadata in.
:type name: str
:param name: The name of the label sub-group.
@@ -684,6 +785,7 @@ def write_label_metadata(
The format of the ome_zarr data which should be used.
Defaults to the most current.
"""
+ fmt = check_format(group, fmt)
label_group = group[name]
image_label_metadata = {**metadata}
if colors is not None:
@@ -691,11 +793,47 @@ def write_label_metadata(
if properties is not None:
image_label_metadata["properties"] = properties
image_label_metadata["version"] = fmt.version
- label_group.attrs["image-label"] = image_label_metadata
- label_list = group.attrs.get("labels", [])
+ label_list = get_metadata(group).get("labels", [])
label_list.append(name)
- group.attrs["labels"] = label_list
+
+ add_metadata(group, {"labels": label_list}, fmt=fmt)
+ add_metadata(label_group, {"image-label": image_label_metadata}, fmt=fmt)
+
+
+def get_metadata(group: zarr.Group, fmt: Format | None = None) -> dict:
+ fmt = check_format(group, fmt)
+ attrs = group.attrs
+ if fmt.version not in ("0.1", "0.2", "0.3", "0.4"):
+ attrs = attrs.get("ome", {})
+ else:
+ attrs = dict(attrs)
+ return attrs
+
+
+def add_metadata(
+ group: zarr.Group, metadata: JSONDict, fmt: Format | None = None
+) -> None:
+
+ fmt = check_format(group, fmt)
+
+ attrs = group.attrs
+ if fmt.version not in ("0.1", "0.2", "0.3", "0.4"):
+ attrs = attrs.get("ome", {})
+
+ for key, value in metadata.items():
+ # merge dicts...
+ if isinstance(value, dict) and isinstance(attrs.get(key), dict):
+ attrs[key].update(value)
+ else:
+ attrs[key] = value
+
+ if fmt.version in ("0.1", "0.2", "0.3", "0.4"):
+ for key, value in attrs.items():
+ group.attrs[key] = value
+ else:
+ # Zarr v3 metadata under 'ome' with top-level version
+ group.attrs["ome"] = attrs
def write_multiscale_labels(
@@ -703,7 +841,7 @@ def write_multiscale_labels(
group: zarr.Group,
name: str,
chunks: tuple[Any, ...] | int | None = None,
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
axes: AxesType = None,
coordinate_transformations: list[list[dict[str, Any]]] | None = None,
storage_options: JSONDict | list[JSONDict] | None = None,
@@ -722,7 +860,7 @@ def write_multiscale_labels(
the image label data to save. Largest level first
All image arrays MUST be up to 5-dimensional with dimensions
ordered (t, c, z, y, x)
- :type group: :class:`zarr.hierarchy.Group`
+ :type group: :class:`zarr.Group`
:param group: The group within the zarr store to write the metadata in.
:type name: str, optional
:param name: The name of this labels data.
@@ -762,6 +900,7 @@ def write_multiscale_labels(
:class:`dask.delayed.Delayed` representing the value to be computed by
dask.
"""
+ fmt = check_format(group, fmt)
sub_group = group.require_group(f"labels/{name}")
dask_delayed_jobs = write_multiscale(
pyramid,
@@ -791,7 +930,7 @@ def write_labels(
name: str,
scaler: Scaler = Scaler(),
chunks: tuple[Any, ...] | int | None = None,
- fmt: Format = CurrentFormat(),
+ fmt: Format | None = None,
axes: AxesType = None,
coordinate_transformations: list[list[dict[str, Any]]] | None = None,
storage_options: JSONDict | list[JSONDict] | None = None,
@@ -811,7 +950,7 @@ def write_labels(
if the scaler argument is non-None.
Label array MUST be up to 5-dimensional with dimensions
ordered (t, c, z, y, x)
- :type group: :class:`zarr.hierarchy.Group`
+ :type group: :class:`zarr.Group`
:param group: The group within the zarr store to write the metadata in.
:type name: str, optional
:param name: The name of this labels data.
@@ -855,6 +994,7 @@ def write_labels(
:class:`dask.delayed.Delayed` representing the value to be computed by
dask.
"""
+ fmt = check_format(group, fmt)
sub_group = group.require_group(f"labels/{name}")
dask_delayed_jobs = []
diff --git a/pyproject.toml b/pyproject.toml
index 72d8f0bf..3a5e40a5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ requires-python = ">3.10"
dependencies = [
"numpy",
"dask",
- "zarr>=2.8.1,<3",
+ "zarr>=v3.0.0",
"fsspec[s3]>=0.8,!=2021.07.0,!=2023.9.0",
# See https://github.com/fsspec/filesystem_spec/issues/819
"aiohttp<4",
diff --git a/tests/data/v2/0/.zarray b/tests/data/v2/0/.zarray
index 705b3f46..c01d65ed 100644
--- a/tests/data/v2/0/.zarray
+++ b/tests/data/v2/0/.zarray
@@ -13,6 +13,7 @@
"id": "blosc",
"shuffle": 1
},
+ "dimension_separator": "/",
"dtype": "|u1",
"fill_value": 0,
"filters": null,
diff --git a/tests/test_cli.py b/tests/test_cli.py
index ca7e692b..aacc4e6a 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -6,7 +6,9 @@
import zarr
from ome_zarr.cli import main
-from ome_zarr.utils import finder, strip_common_prefix, view
+from ome_zarr.format import CurrentFormat, FormatV04, FormatV05
+from ome_zarr.io import parse_url
+from ome_zarr.utils import find_multiscales, finder, strip_common_prefix, view
from ome_zarr.writer import write_plate_metadata
@@ -31,44 +33,89 @@ def s3_address(self, request):
"0.1": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.1/6001240.zarr",
"0.2": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.2/6001240.zarr",
"0.3": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.3/9836842.zarr",
+ "0.4": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr",
+ "0.5": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.5/idr0062A/6001240_labels.zarr",
}
return urls[request.param]
- def test_coins_info(self):
+ @pytest.mark.parametrize(
+ "fmt",
+ (
+ pytest.param(FormatV04(), id="V04"),
+ pytest.param(FormatV05(), id="V05"),
+ pytest.param(None, id="CurrentFormat"),
+ ),
+ )
+ def test_coins_info(self, capsys, fmt):
+ """Test create and info with various formats."""
filename = str(self.path) + "-1"
- main(["create", "--method=coins", filename])
+ args = ["create", "--method=coins", filename]
+ if fmt:
+ args += ["--format", fmt.version]
+ main(args)
main(["info", filename])
+ out, err = capsys.readouterr()
+ print("Captured output:", out)
+ assert os.path.join("labels", "coins") in out
+ version = fmt.version if fmt else CurrentFormat().version
+ assert f"- version: {version}" in out
def test_astronaut_info(self):
filename = str(self.path) + "-2"
main(["create", "--method=astronaut", filename])
main(["info", filename])
- def test_astronaut_download(self, tmpdir):
+ @pytest.mark.parametrize(
+ "fmt",
+ (
+ pytest.param(FormatV04(), id="V04"),
+ pytest.param(FormatV05(), id="V05"),
+ pytest.param(None, id="CurrentFormat"),
+ ),
+ )
+ def test_astronaut_download(self, tmpdir, fmt):
out = str(tmpdir / "out")
filename = str(self.path) + "-3"
basename = os.path.split(filename)[-1]
- main(["create", "--method=astronaut", filename])
+ args = ["create", "--method=astronaut", filename]
+ if fmt:
+ args += ["--format", fmt.version]
+ main(args)
main(["download", filename, f"--output={out}"])
main(["info", f"{out}/{basename}"])
- assert directory_items(Path(out) / "data-3") == [
- Path(".zattrs"),
- Path(".zgroup"),
- Path("0"),
- Path("1"),
- Path("2"),
- Path("3"),
- Path("4"),
- Path("labels"),
- ]
-
- assert directory_items(Path(out) / "data-3" / "1") == [
- Path(".zarray"),
- Path("0"),
- Path("1"),
- Path("2"),
- ]
+ if fmt is not None and fmt.zarr_format == 2:
+ assert directory_items(Path(out) / "data-3") == [
+ Path(".zattrs"),
+ Path(".zgroup"),
+ Path("0"),
+ Path("1"),
+ Path("2"),
+ Path("3"),
+ Path("4"),
+ Path("labels"),
+ ]
+ assert directory_items(Path(out) / "data-3" / "1") == [
+ Path(".zarray"),
+ Path(".zattrs"), # empty '{}'
+ Path("0"),
+ Path("1"),
+ Path("2"),
+ ]
+ else:
+ assert directory_items(Path(out) / "data-3") == [
+ Path("0"),
+ Path("1"),
+ Path("2"),
+ Path("3"),
+ Path("4"),
+ Path("labels"),
+ Path("zarr.json"),
+ ]
+ assert directory_items(Path(out) / "data-3" / "1") == [
+ Path("c"),
+ Path("zarr.json"),
+ ]
def test_s3_info(self, s3_address):
main(["info", s3_address])
@@ -113,12 +160,37 @@ def test_view(self):
# we need dry_run to be True to avoid blocking the test with server
view(filename, 8000, True)
- def test_finder(self):
+ @pytest.mark.parametrize(
+ "fmt",
+ (pytest.param(FormatV04(), id="V04"), pytest.param(FormatV05(), id="V05")),
+ )
+ def test_finder(self, fmt):
img_dir = (self.path / "images").mkdir()
+
+ # test with empty directory - for code coverage
+ finder(img_dir, 8000, True)
+ assert not (img_dir / "biofile_finder.csv").exists()
+
img_dir2 = (img_dir / "dir2").mkdir()
bf2raw_dir = (img_dir / "bf2raw.zarr").mkdir()
- main(["create", "--method=astronaut", (str(img_dir / "astronaut"))])
- main(["create", "--method=coins", (str(img_dir2 / "coins"))])
+ main(
+ [
+ "create",
+ "--method=astronaut",
+ (str(img_dir / "astronaut")),
+ "--format",
+ fmt.version,
+ ]
+ )
+ main(
+ [
+ "create",
+ "--method=coins",
+ (str(img_dir2 / "coins")),
+ "--format",
+ fmt.version,
+ ]
+ )
(bf2raw_dir / "OME").mkdir()
# write minimal bioformats2raw and xml metadata
@@ -132,8 +204,8 @@ def test_finder(self):
)
# create a plate
- plate_dir = (img_dir2 / "plate").mkdir()
- store = zarr.DirectoryStore(str(plate_dir))
+ plate_path = Path(img_dir2.mkdir("plate"))
+ store = parse_url(plate_path, mode="w", fmt=fmt).store
root = zarr.group(store=store)
write_plate_metadata(root, ["A"], ["1"], ["A/1"])
@@ -146,3 +218,8 @@ def test_finder(self):
assert "dir2/plate/A/1/0,plate,dir2" in csv_text
assert "coins,dir2" in csv_text
assert "test.fake" in csv_text
+
+ def test_find_multiscales(self):
+ # for code coverage...
+ empty_dir = (self.path / "find_multiscales").mkdir()
+ assert len(find_multiscales(empty_dir)) == 0
diff --git a/tests/test_io.py b/tests/test_io.py
index 94b1900a..83a7a355 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -1,11 +1,12 @@
from pathlib import Path
-import fsspec
import pytest
import zarr
+from zarr.storage import LocalStore
from ome_zarr.data import create_zarr
from ome_zarr.io import ZarrLocation, parse_url
+from ome_zarr.writer import add_metadata, get_metadata
class TestIO:
@@ -13,14 +14,14 @@ class TestIO:
def initdir(self, tmpdir):
self.path = tmpdir.mkdir("data")
create_zarr(str(self.path))
- self.store = parse_url(str(self.path), mode="w").store
- self.root = zarr.group(store=self.store)
+ self.store = parse_url(str(self.path), mode="r").store
+ self.root = zarr.open_group(store=self.store, mode="r")
def test_parse_url(self):
assert parse_url(str(self.path))
def test_parse_nonexistent_url(self):
- assert parse_url(self.path + "/does-not-exist") is None
+ assert parse_url(str(self.path + "/does-not-exist")) is None
def test_loc_str(self):
assert ZarrLocation(str(self.path))
@@ -32,7 +33,22 @@ def test_loc_store(self):
assert ZarrLocation(self.store)
def test_loc_fs(self):
- fs = fsspec.filesystem("memory")
- fsstore = zarr.storage.FSStore(url="/", fs=fs)
- loc = ZarrLocation(fsstore)
+ store = LocalStore(str(self.path))
+ loc = ZarrLocation(store)
assert loc
+
+ def test_no_overwrite(self):
+ print("self.path:", self.path)
+
+ assert self.root.attrs.get("ome") is not None
+ # Test that we can open a store to write, without
+ # overwriting existing data
+ new_store = parse_url(str(self.path), mode="w").store
+ new_root = zarr.open_group(store=new_store)
+ add_metadata(new_root, {"extra": "test_no_overwrite"})
+ # read...
+ read_store = parse_url(str(self.path)).store
+ read_root = zarr.open_group(store=read_store, mode="r")
+ attrs = get_metadata(read_root)
+ assert attrs.get("extra") == "test_no_overwrite"
+ assert attrs.get("multiscales") is not None
diff --git a/tests/test_node.py b/tests/test_node.py
index a538c7c7..fc613b14 100644
--- a/tests/test_node.py
+++ b/tests/test_node.py
@@ -3,7 +3,7 @@
from numpy import zeros
from ome_zarr.data import create_zarr
-from ome_zarr.format import FormatV01, FormatV02, FormatV03
+from ome_zarr.format import FormatV01, FormatV02, FormatV03, FormatV04
from ome_zarr.io import parse_url
from ome_zarr.reader import Label, Labels, Multiscales, Node, Plate, Well
from ome_zarr.writer import write_image, write_plate_metadata, write_well_metadata
@@ -44,16 +44,16 @@ class TestHCSNode:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = tmpdir.mkdir("data")
- self.store = parse_url(str(self.path), mode="w").store
+ self.store = parse_url(str(self.path), mode="w", fmt=FormatV04()).store
self.root = zarr.group(store=self.store)
def test_minimal_plate(self):
- write_plate_metadata(self.root, ["A"], ["1"], ["A/1"])
+ write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], fmt=FormatV01())
row_group = self.root.require_group("A")
well = row_group.require_group("1")
- write_well_metadata(well, ["0"])
+ write_well_metadata(well, ["0"], fmt=FormatV04())
image = well.require_group("0")
- write_image(zeros((1, 1, 1, 256, 256)), image)
+ write_image(zeros((1, 1, 1, 256, 256)), image, fmt=FormatV01())
node = Node(parse_url(str(self.path)), list())
assert node.data
@@ -85,7 +85,7 @@ def test_multiwells_plate(self, fmt):
write_well_metadata(well, ["0", "1", "2"], fmt=fmt)
for field in range(3):
image = well.require_group(str(field))
- write_image(zeros((1, 1, 1, 256, 256)), image)
+ write_image(zeros((1, 1, 1, 256, 256)), image, fmt=fmt)
node = Node(parse_url(str(self.path)), list())
assert node.data
diff --git a/tests/test_reader.py b/tests/test_reader.py
index 86188a0e..3aeda912 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -5,9 +5,16 @@
from numpy import ones, zeros
from ome_zarr.data import create_zarr
+from ome_zarr.format import FormatV04
from ome_zarr.io import parse_url
from ome_zarr.reader import Node, Plate, Reader, Well
-from ome_zarr.writer import write_image, write_plate_metadata, write_well_metadata
+from ome_zarr.writer import (
+ add_metadata,
+ get_metadata,
+ write_image,
+ write_plate_metadata,
+ write_well_metadata,
+)
class TestReader:
@@ -42,6 +49,39 @@ def test_omero(self):
assert isinstance(omero["channels"], list)
assert len(omero["channels"]) == 1
+ def test_read_v05(self):
+ rng = np.random.default_rng(0)
+ data = rng.poisson(lam=10, size=(10, 128, 128)).astype(np.uint8)
+ img_path = str(self.path / "test_read_v05.zarr")
+ root = zarr.group(img_path)
+ arr = root.create_array(
+ name="s0", shape=data.shape, chunks=(10, 10, 10), dtype=data.dtype
+ )
+ arr[:, :] = data
+ root.attrs["ome"] = {
+ "version": "0.5",
+ "multiscales": [
+ {
+ "datasets": [
+ {
+ "path": "s0",
+ "coordinateTransformations": [
+ {
+ "type": "scale",
+ "scale": [1, 1, 1],
+ }
+ ],
+ }
+ ]
+ }
+ ],
+ }
+ reader = Reader(parse_url(img_path))
+ nodes = list(reader())
+ assert len(nodes) == 1
+ image_node = nodes[0]
+ assert np.allclose(data, image_node.data[0])
+
class TestInvalid:
@pytest.fixture(autouse=True)
@@ -51,9 +91,9 @@ def initdir(self, tmpdir):
def test_invalid_version(self):
grp = create_zarr(str(self.path))
# update version to something invalid
- attrs = grp.attrs.asdict()
+ attrs = get_metadata(grp)
attrs["multiscales"][0]["version"] = "invalid"
- grp.attrs.put(attrs)
+ add_metadata(grp, attrs)
# should raise exception
with pytest.raises(ValueError) as exe:
reader = Reader(parse_url(str(self.path)))
@@ -65,7 +105,7 @@ class TestHCSReader:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = tmpdir.mkdir("data")
- self.store = parse_url(str(self.path), mode="w").store
+ self.store = parse_url(str(self.path), mode="w", fmt=FormatV04()).store
self.root = zarr.group(store=self.store)
def test_minimal_plate(self):
diff --git a/tests/test_scaler.py b/tests/test_scaler.py
index 93ddc726..c3ab1759 100644
--- a/tests/test_scaler.py
+++ b/tests/test_scaler.py
@@ -145,4 +145,4 @@ def test_big_dask_pyramid(self, tmpdir):
print("level_1", level_1)
# to zarr invokes compute
data_dir = tmpdir.mkdir("test_big_dask_pyramid")
- da.to_zarr(level_1, data_dir)
+ da.to_zarr(level_1, str(data_dir))
diff --git a/tests/test_writer.py b/tests/test_writer.py
index a396ca6e..5461025f 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -1,6 +1,6 @@
import filecmp
+import json
import pathlib
-from tempfile import TemporaryDirectory
from typing import Any
import dask.array as da
@@ -9,8 +9,17 @@
import zarr
from dask import persist
from numcodecs import Blosc
-
-from ome_zarr.format import CurrentFormat, FormatV01, FormatV02, FormatV03, FormatV04
+from zarr.abc.codec import BytesBytesCodec
+from zarr.codecs import BloscCodec
+
+from ome_zarr.format import (
+ CurrentFormat,
+ FormatV01,
+ FormatV02,
+ FormatV03,
+ FormatV04,
+ FormatV05,
+)
from ome_zarr.io import ZarrLocation, parse_url
from ome_zarr.reader import Multiscales, Reader
from ome_zarr.scale import Scaler
@@ -39,10 +48,17 @@ class TestWriter:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = pathlib.Path(tmpdir.mkdir("data"))
- self.store = parse_url(self.path, mode="w").store
+ # create zarr v2 group...
+ self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store
self.root = zarr.group(store=self.store)
self.group = self.root.create_group("test")
+ # let's create zarr v3 group too...
+ self.path_v3 = self.path / "v3"
+ store_v3 = parse_url(self.path_v3, mode="w").store
+ root_v3 = zarr.group(store=store_v3)
+ self.group_v3 = root_v3.create_group("test")
+
def create_data(self, shape, dtype=np.uint8, mean_val=10):
rng = np.random.default_rng(0)
return rng.poisson(mean_val, size=shape).astype(dtype)
@@ -72,6 +88,7 @@ def scaler(self, request):
pytest.param(FormatV02, id="V02"),
pytest.param(FormatV03, id="V03"),
pytest.param(FormatV04, id="V04"),
+ pytest.param(FormatV05, id="V05"),
),
)
@pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
@@ -79,9 +96,17 @@ def scaler(self, request):
def test_writer(
self, shape, scaler, format_version, array_constructor, storage_options_list
):
+ version = format_version()
+
+ if version.version == "0.5":
+ group = self.group_v3
+ grp_path = self.path_v3 / "test"
+ else:
+ group = self.group
+ grp_path = self.path / "test"
+
data = self.create_data(shape)
data = array_constructor(data)
- version = format_version()
axes = "tczyx"[-len(shape) :]
transformations = []
for dataset_transfs in TRANSFORMATIONS:
@@ -98,7 +123,7 @@ def test_writer(
storage_options = [{"chunks": chunk} for chunk in chunks]
write_image(
image=data,
- group=self.group,
+ group=group,
scaler=scaler,
fmt=version,
axes=axes,
@@ -107,7 +132,7 @@ def test_writer(
)
# Verify
- reader = Reader(parse_url(f"{self.path}/test"))
+ reader = Reader(parse_url(f"{grp_path}"))
node = next(iter(reader()))
assert Multiscales.matches(node.zarr)
if version.version in ("0.1", "0.2"):
@@ -129,13 +154,47 @@ def test_writer(
assert tuple(first_chunk) == _retuple(expected, nd_array.shape)
assert np.allclose(data, node.data[0][...].compute())
+ def test_mix_zarr_formats(self):
+ # check group zarr v2 and v3 matches fmt
+ data = self.create_data((64, 64, 64))
+ with pytest.raises(ValueError, match=r"Group is zarr_format: 2"):
+ write_image(data, self.group, axes="zyx", fmt=CurrentFormat())
+
+ with pytest.raises(ValueError, match=r"Group is zarr_format: 3"):
+ write_multiscale([data], self.group_v3, fmt=FormatV04())
+
+ with pytest.raises(ValueError, match=r"Group is zarr_format: 3"):
+ write_plate_metadata(self.group_v3, ["A"], ["1"], ["A/1"], fmt=FormatV04())
+
+ with pytest.raises(ValueError, match=r"Group is zarr_format: 2"):
+ write_well_metadata(self.group, [{"path": "0"}], fmt=CurrentFormat())
+
+ @pytest.mark.parametrize("zarr_format", [2, 3])
@pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
- def test_write_image_current(self, array_constructor):
+ def test_write_image_current(self, array_constructor, zarr_format):
shape = (64, 64, 64)
data = self.create_data(shape)
data = array_constructor(data)
- write_image(data, self.group, axes="zyx")
- reader = Reader(parse_url(f"{self.path}/test"))
+
+ if zarr_format == 2:
+ group = self.group
+ grp_path = self.path / "test"
+ else:
+ group = self.group_v3
+ grp_path = self.path_v3 / "test"
+
+ write_image(data, group, axes="zyx")
+ reader = Reader(parse_url(f"{grp_path}"))
+
+ # manually check this is zarr v2 or v3
+ if zarr_format == 2:
+ json_text = (grp_path / ".zattrs").read_text(encoding="utf-8")
+ attrs_json = json.loads(json_text)
+ else:
+ json_text = (grp_path / "zarr.json").read_text(encoding="utf-8")
+ attrs_json = json.loads(json_text).get("attributes", {}).get("ome", {})
+ assert "multiscales" in attrs_json
+
image_node = next(iter(reader()))
for transfs in image_node.metadata["coordinateTransformations"]:
assert len(transfs) == 1
@@ -148,34 +207,61 @@ def test_write_image_current(self, array_constructor):
@pytest.mark.parametrize("read_from_zarr", [True, False])
@pytest.mark.parametrize("compute", [True, False])
- def test_write_image_dask(self, read_from_zarr, compute):
+ @pytest.mark.parametrize("zarr_format", [2, 3])
+ def test_write_image_dask(self, read_from_zarr, compute, zarr_format):
+ if zarr_format == 2:
+ grp_path = self.path / "test"
+ fmt = FormatV04()
+ zarr_attrs = ".zattrs"
+ zarr_array = ".zarray"
+ group = self.group
+ else:
+ grp_path = self.path_v3 / "test"
+ fmt = CurrentFormat()
+ zarr_attrs = "zarr.json"
+ zarr_array = "zarr.json"
+ group = self.group_v3
+
# Size 100 tests resize shapes: https://github.com/ome/ome-zarr-py/issues/219
shape = (128, 200, 200)
data = self.create_data(shape)
data_delayed = da.from_array(data)
chunks = (32, 32)
- opts = {"chunks": chunks, "compressor": None}
+ # same NAME needed for exact zarr_attrs match below
+ # (otherwise group.name is used)
+ NAME = "test_write_image_dask"
+ opts = {"chunks": chunks}
if read_from_zarr:
# write to zarr and re-read as dask...
- path = f"{self.path}/temp/"
- store = parse_url(path, mode="w").store
- temp_group = zarr.group(store=store).create_group("test")
- write_image(data, temp_group, axes="zyx", storage_options=opts)
- loc = ZarrLocation(f"{self.path}/temp/test")
+ path = f"{grp_path}/temp/"
+ store = parse_url(path, mode="w", fmt=fmt).store
+ # store and group will be zarr v2 or v3 depending on fmt
+ temp_group = zarr.group(store=store).create_group("to_dask")
+ assert temp_group.info._zarr_format == zarr_format
+ write_image(
+ data_delayed,
+ temp_group,
+ axes="zyx",
+ storage_options=opts,
+ name=NAME,
+ )
+ print("PATH", f"{grp_path}/temp/to_dask")
+ loc = ZarrLocation(f"{grp_path}/temp/to_dask")
+
reader = Reader(loc)()
nodes = list(reader)
- data_delayed = (
- nodes[0]
- .load(Multiscales)
- .array(resolution="0", version=CurrentFormat().version)
- )
+ data_delayed = nodes[0].load(Multiscales).array(resolution="0")
+ # check that the data is the same
+ assert np.allclose(data, data_delayed[...].compute())
+ assert group.info._zarr_format == zarr_format
dask_delayed_jobs = write_image(
data_delayed,
- self.group,
+ group,
axes="zyx",
- storage_options={"chunks": chunks, "compressor": None},
+ storage_options={"chunks": chunks},
compute=compute,
+ name=NAME,
)
assert not compute == len(dask_delayed_jobs)
@@ -185,7 +271,8 @@ def test_write_image_dask(self, read_from_zarr, compute):
# before persisting the jobs
dask_delayed_jobs = persist(*dask_delayed_jobs)
- reader = Reader(parse_url(f"{self.path}/test"))
+ # check the data written to zarr v2 or v3 group
+ reader = Reader(parse_url(f"{grp_path}"))
image_node = next(iter(reader()))
first_chunk = [c[0] for c in image_node.data[0].chunks]
assert tuple(first_chunk) == _retuple(chunks, image_node.data[0].shape)
@@ -203,16 +290,16 @@ def test_write_image_dask(self, read_from_zarr, compute):
# if shape smaller than chunk, dask writer uses chunk == shape
# so we only compare larger resolutions
assert filecmp.cmp(
- f"{self.path}/temp/test/{level}/.zarray",
- f"{self.path}/test/{level}/.zarray",
+ f"{grp_path}/temp/to_dask/{level}/{zarr_array}",
+ f"{grp_path}/{level}/{zarr_array}",
shallow=False,
)
if read_from_zarr:
- # .zattrs should be the same
+ # exact match, including NAME
assert filecmp.cmp(
- f"{self.path}/temp/test/.zattrs",
- f"{self.path}/test/.zattrs",
+ f"{grp_path}/temp/to_dask/{zarr_attrs}",
+ f"{grp_path}/{zarr_attrs}",
shallow=False,
)
@@ -226,29 +313,80 @@ def test_write_image_scalar_chunks(self):
write_image(
image=data, group=self.group, axes="xyz", storage_options={"chunks": 32}
)
- for data in self.group.values():
+ for data in self.group.array_values():
print(data)
assert data.chunks == (32, 32, 32)
+ @pytest.mark.parametrize(
+ "format_version",
+ (
+ pytest.param(FormatV04, id="V04"),
+ pytest.param(FormatV05, id="V05"),
+ ),
+ )
@pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
- def test_write_image_compressed(self, array_constructor):
+ def test_write_image_compressed(self, array_constructor, format_version):
shape = (64, 64, 64)
data = self.create_data(shape)
data = array_constructor(data)
- compressor = Blosc(cname="zstd", clevel=5, shuffle=Blosc.SHUFFLE)
+ path = self.path / "test_write_image_compressed"
+ store = parse_url(path, mode="w", fmt=format_version()).store
+ root = zarr.group(store=store)
+ CNAME = "lz4"
+ LEVEL = 4
+ if format_version().zarr_format == 3:
+ compressor = BloscCodec(cname=CNAME, clevel=LEVEL, shuffle="shuffle")
+ assert isinstance(compressor, BytesBytesCodec)
+ if isinstance(data, da.Array):
+ # skip test - can't get this to pass. Fails with:
+ # ValueError: compressor cannot be used for arrays with zarr_format 3.
+ # Use bytes-to-bytes codecs instead.
+ pytest.skip("storage_options['compressor'] fails in da.to_zarr()")
+ else:
+ compressor = Blosc(cname=CNAME, clevel=LEVEL, shuffle=Blosc.SHUFFLE)
+
write_image(
- data, self.group, axes="zyx", storage_options={"compressor": compressor}
+ data,
+ root,
+ axes="zyx",
+ storage_options={"compressor": compressor},
)
- group = zarr.open(f"{self.path}/test")
- assert group["0"].compressor.get_config() == {
- "id": "blosc",
- "cname": "zstd",
- "clevel": 5,
- "shuffle": Blosc.SHUFFLE,
- "blocksize": 0,
- }
-
- def test_default_compression(self):
+ group = zarr.open(f"{path}")
+ for ds in ["0", "1"]:
+ assert len(group[ds].info._compressors) > 0
+ comp = group[ds].info._compressors[0]
+ if format_version().zarr_format == 3:
+ print("comp", comp.to_dict())
+ # {'configuration': {'checksum': False, 'level': 0}, 'name': 'zstd'}
+ assert comp.to_dict() == {
+ "name": "blosc",
+ "configuration": {
+ "typesize": 1,
+ "cname": CNAME,
+ "clevel": LEVEL,
+ "shuffle": "shuffle",
+ "blocksize": 0,
+ },
+ }
+ else:
+ print("comp", comp.get_config())
+ assert comp.get_config() == {
+ "id": "blosc",
+ "cname": CNAME,
+ "clevel": LEVEL,
+ "shuffle": Blosc.SHUFFLE,
+ "blocksize": 0,
+ }
+
+ @pytest.mark.parametrize(
+ "format_version",
+ (
+ pytest.param(FormatV04, id="V04"),
+ pytest.param(FormatV05, id="V05"),
+ ),
+ )
+ @pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
+ def test_default_compression(self, array_constructor, format_version):
"""Test that the default compression is not None.
We make an array of zeros which should compress trivially easily,
@@ -259,19 +397,56 @@ def test_default_compression(self):
# avoid empty chunks so they are guaranteed to be written out to disk
arr_np[0, 0, 0, 0] = 1
# 4MB chunks, trivially compressible
- arr = da.from_array(arr_np, chunks=(1, 50, 200, 400))
- with TemporaryDirectory(suffix=".ome.zarr") as tempdir:
- path = tempdir
- store = parse_url(path, mode="w").store
- root = zarr.group(store=store)
- # no compressor options, we are checking default
- write_multiscale([arr], group=root, axes="tzyx")
- # check chunk: multiscale level 0, 4D chunk at (0, 0, 0, 0)
- chunk_size = (pathlib.Path(path) / "0/0/0/0/0").stat().st_size
- assert chunk_size < 4e6
-
- def test_validate_coordinate_transforms(self):
- fmt = FormatV04()
+ arr = array_constructor(arr_np)
+ # tempdir = TemporaryDirectory(suffix=".ome.zarr")
+ # self.path = pathlib.Path(tmpdir.mkdir("data"))
+ path = self.path / "test_default_compression"
+ store = parse_url(path, mode="w", fmt=format_version()).store
+ root = zarr.group(store=store)
+ assert root.info._zarr_format == format_version().zarr_format
+ # no compressor options, we are checking default
+ write_image(
+ arr, group=root, axes="tzyx", storage_options=dict(chunks=(1, 100, 100))
+ )
+
+ # check chunk: multiscale level 0, 4D chunk at (0, 0, 0, 0)
+ c = ""
+ for ds in ["0", "1"]:
+ if format_version().zarr_format == 3:
+ assert (path / "zarr.json").exists()
+ assert (path / ds / "zarr.json").exists()
+ c = "c/"
+ json_text = (path / ds / "zarr.json").read_text(encoding="utf-8")
+ arr_json = json.loads(json_text)
+ assert arr_json["codecs"][0]["name"] == "bytes"
+ assert arr_json["codecs"][1] == {
+ "name": "zstd",
+ "configuration": {"level": 0, "checksum": False},
+ }
+ else:
+ assert (path / ".zattrs").exists()
+ json_text = (path / ds / ".zarray").read_text(encoding="utf-8")
+ arr_json = json.loads(json_text)
+ assert arr_json["compressor"] == {
+ "blocksize": 0,
+ "clevel": 5,
+ "cname": "zstd",
+ "id": "blosc",
+ "shuffle": 1,
+ }
+
+ chunk_size = (path / f"0/{c}0/0/0/0").stat().st_size
+ assert chunk_size < 4e6
+
+ @pytest.mark.parametrize(
+ "format_version",
+ (
+ pytest.param(FormatV04, id="V04"),
+ pytest.param(FormatV05, id="V05"),
+ ),
+ )
+ def test_validate_coordinate_transforms(self, format_version):
+ fmt = format_version()
transformations = [
[{"type": "scale", "scale": (1, 1)}],
@@ -429,17 +604,39 @@ class TestMultiscalesMetadata:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = pathlib.Path(tmpdir.mkdir("data"))
- self.store = parse_url(self.path, mode="w").store
+ # create zarr v2 group...
+ self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store
self.root = zarr.group(store=self.store)
- def test_multi_levels_transformations(self):
+ # let's create zarr v3 group too...
+ self.path_v3 = self.path / "v3"
+ store_v3 = parse_url(self.path_v3, mode="w").store
+ self.root_v3 = zarr.group(store=store_v3)
+
+ @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05()))
+ def test_multi_levels_transformations(self, fmt):
datasets = []
for level, transf in enumerate(TRANSFORMATIONS):
datasets.append({"path": str(level), "coordinateTransformations": transf})
- write_multiscales_metadata(self.root, datasets, axes="tczyx")
- assert "multiscales" in self.root.attrs
- assert "version" in self.root.attrs["multiscales"][0]
- assert self.root.attrs["multiscales"][0]["datasets"] == datasets
+ if fmt.version == "0.5":
+ group = self.root_v3
+ else:
+ group = self.root
+ write_multiscales_metadata(group, datasets, axes="tczyx")
+ # we want to be sure this is zarr v2 / v3
+ attrs = group.attrs
+ if fmt.version == "0.5":
+ attrs = attrs.get("ome")
+ assert "version" in attrs
+ json_text = (self.path_v3 / "zarr.json").read_text(encoding="utf-8")
+ attrs_json = json.loads(json_text).get("attributes", {}).get("ome", {})
+ else:
+ json_text = (self.path / ".zattrs").read_text(encoding="utf-8")
+ attrs_json = json.loads(json_text)
+ assert "version" in attrs["multiscales"][0]
+ assert "multiscales" in attrs_json
+ assert "multiscales" in attrs
+ assert attrs["multiscales"][0]["datasets"] == datasets
@pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03()))
def test_version(self, fmt):
@@ -470,7 +667,7 @@ def test_axes_V03(self, axes):
assert self.root.attrs["multiscales"][0]["axes"] == axes
with pytest.raises(ValueError):
# for v0.4 and above, paths no-longer supported (need dataset dicts)
- write_multiscales_metadata(self.root, ["0"], axes=axes)
+ write_multiscales_metadata(self.root, ["0"], axes=axes, fmt=FormatV04())
@pytest.mark.parametrize("fmt", (FormatV01(), FormatV02()))
def test_axes_ignored(self, fmt):
@@ -498,7 +695,7 @@ def test_invalid_0_3_axes(self, axes):
def test_invalid_datasets(self, datasets):
with pytest.raises(ValueError):
write_multiscales_metadata(
- self.root, datasets, axes=["t", "c", "z", "y", "x"]
+ self.root, datasets, axes=["t", "c", "z", "y", "x"], fmt=FormatV04()
)
@pytest.mark.parametrize(
@@ -519,7 +716,7 @@ def test_valid_transformations(self, coordinateTransformations):
"coordinateTransformations": coordinateTransformations,
}
]
- write_multiscales_metadata(self.root, datasets, axes=axes)
+ write_multiscales_metadata(self.root, datasets, axes=axes, fmt=FormatV04())
assert "multiscales" in self.root.attrs
assert self.root.attrs["multiscales"][0]["axes"] == axes
assert self.root.attrs["multiscales"][0]["datasets"] == datasets
@@ -570,7 +767,7 @@ def test_invalid_transformations(self, coordinateTransformations):
{"path": "0", "coordinateTransformations": coordinateTransformations}
]
with pytest.raises(ValueError):
- write_multiscales_metadata(self.root, datasets, axes=axes)
+ write_multiscales_metadata(self.root, datasets, axes=axes, fmt=FormatV04())
@pytest.mark.parametrize(
"metadata",
@@ -603,7 +800,10 @@ def test_omero_metadata(self, metadata: dict[str, Any] | None):
KeyError, match="If `'omero'` is present, value cannot be `None`."
):
write_multiscales_metadata(
- self.root, datasets, axes="tczyx", metadata={"omero": metadata}
+ self.root,
+ datasets,
+ axes="tczyx",
+ metadata={"omero": metadata},
)
else:
window_metadata = (
@@ -624,6 +824,7 @@ def test_omero_metadata(self, metadata: dict[str, Any] | None):
datasets,
axes="tczyx",
metadata={"omero": metadata},
+ fmt=FormatV04(),
)
elif isinstance(window_metadata, list):
with pytest.raises(TypeError, match=".*`'window'`.*"):
@@ -632,6 +833,7 @@ def test_omero_metadata(self, metadata: dict[str, Any] | None):
datasets,
axes="tczyx",
metadata={"omero": metadata},
+ fmt=FormatV04(),
)
elif color_metadata is not None and len(color_metadata) != 6:
with pytest.raises(TypeError, match=".*`'color'`.*"):
@@ -643,7 +845,10 @@ def test_omero_metadata(self, metadata: dict[str, Any] | None):
)
else:
write_multiscales_metadata(
- self.root, datasets, axes="tczyx", metadata={"omero": metadata}
+ self.root,
+ datasets,
+ axes="tczyx",
+ metadata={"omero": metadata},
)
@@ -651,23 +856,40 @@ class TestPlateMetadata:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = pathlib.Path(tmpdir.mkdir("data"))
- self.store = parse_url(self.path, mode="w").store
+ # create zarr v2 group...
+ self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store
self.root = zarr.group(store=self.store)
+ # create zarr v3 group...
+ self.path_v3 = self.path / "v3"
+ store_v3 = parse_url(self.path_v3, mode="w").store
+ self.root_v3 = zarr.group(store=store_v3)
+
+ @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05()))
+ def test_minimal_plate(self, fmt):
+ if fmt.version == "0.4":
+ group = self.root
+ else:
+ group = self.root_v3
+ write_plate_metadata(group, ["A"], ["1"], ["A/1"])
+ attrs = group.attrs
+ if fmt.version != "0.4":
+ attrs = attrs["ome"]
+ assert attrs["version"] == fmt.version
+ else:
+ attrs["plate"]["version"] == fmt.version
- def test_minimal_plate(self):
- write_plate_metadata(self.root, ["A"], ["1"], ["A/1"])
- assert "plate" in self.root.attrs
- assert self.root.attrs["plate"]["columns"] == [{"name": "1"}]
- assert self.root.attrs["plate"]["rows"] == [{"name": "A"}]
- assert self.root.attrs["plate"]["version"] == CurrentFormat().version
- assert self.root.attrs["plate"]["wells"] == [
+ assert "plate" in attrs
+ assert attrs["plate"]["columns"] == [{"name": "1"}]
+ assert attrs["plate"]["rows"] == [{"name": "A"}]
+ assert attrs["plate"]["wells"] == [
{"path": "A/1", "rowIndex": 0, "columnIndex": 0}
]
- assert "name" not in self.root.attrs["plate"]
- assert "field_count" not in self.root.attrs["plate"]
- assert "acquisitions" not in self.root.attrs["plate"]
+ assert "name" not in attrs["plate"]
+ assert "field_count" not in attrs["plate"]
+ assert "acquisitions" not in attrs["plate"]
- def test_12wells_plate(self):
+ @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05()))
+ def test_12wells_plate(self, fmt):
rows = ["A", "B", "C", "D"]
cols = ["1", "2", "3"]
wells = [
@@ -684,21 +906,28 @@ def test_12wells_plate(self):
"D/2",
"D/3",
]
- write_plate_metadata(self.root, rows, cols, wells)
- assert "plate" in self.root.attrs
- assert self.root.attrs["plate"]["columns"] == [
+ if fmt.version == "0.4":
+ group = self.root
+ else:
+ group = self.root_v3
+ write_plate_metadata(group, rows, cols, wells)
+ attrs = group.attrs
+ if fmt.version != "0.4":
+ attrs = attrs["ome"]
+
+ assert "plate" in attrs
+ assert attrs["plate"]["columns"] == [
{"name": "1"},
{"name": "2"},
{"name": "3"},
]
- assert self.root.attrs["plate"]["rows"] == [
+ assert attrs["plate"]["rows"] == [
{"name": "A"},
{"name": "B"},
{"name": "C"},
{"name": "D"},
]
- assert self.root.attrs["plate"]["version"] == CurrentFormat().version
- assert self.root.attrs["plate"]["wells"] == [
+ assert attrs["plate"]["wells"] == [
{"path": "A/1", "rowIndex": 0, "columnIndex": 0},
{"path": "A/2", "rowIndex": 0, "columnIndex": 1},
{"path": "A/3", "rowIndex": 0, "columnIndex": 2},
@@ -712,41 +941,48 @@ def test_12wells_plate(self):
{"path": "D/2", "rowIndex": 3, "columnIndex": 1},
{"path": "D/3", "rowIndex": 3, "columnIndex": 2},
]
- assert "name" not in self.root.attrs["plate"]
- assert "field_count" not in self.root.attrs["plate"]
- assert "acquisitions" not in self.root.attrs["plate"]
+ assert "name" not in attrs["plate"]
+ assert "field_count" not in attrs["plate"]
+ assert "acquisitions" not in attrs["plate"]
- def test_sparse_plate(self):
+ @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05()))
+ def test_sparse_plate(self, fmt):
rows = ["A", "B", "C", "D", "E"]
cols = ["1", "2", "3", "4", "5"]
wells = [
"B/2",
"E/5",
]
- write_plate_metadata(self.root, rows, cols, wells)
- assert "plate" in self.root.attrs
- assert self.root.attrs["plate"]["columns"] == [
+ if fmt.version == "0.4":
+ group = self.root
+ else:
+ group = self.root_v3
+ write_plate_metadata(group, rows, cols, wells)
+ attrs = group.attrs
+ if fmt.version != "0.4":
+ attrs = attrs["ome"]
+ assert "plate" in attrs
+ assert attrs["plate"]["columns"] == [
{"name": "1"},
{"name": "2"},
{"name": "3"},
{"name": "4"},
{"name": "5"},
]
- assert self.root.attrs["plate"]["rows"] == [
+ assert attrs["plate"]["rows"] == [
{"name": "A"},
{"name": "B"},
{"name": "C"},
{"name": "D"},
{"name": "E"},
]
- assert self.root.attrs["plate"]["version"] == CurrentFormat().version
- assert self.root.attrs["plate"]["wells"] == [
+ assert attrs["plate"]["wells"] == [
{"path": "B/2", "rowIndex": 1, "columnIndex": 1},
{"path": "E/5", "rowIndex": 4, "columnIndex": 4},
]
- assert "name" not in self.root.attrs["plate"]
- assert "field_count" not in self.root.attrs["plate"]
- assert "acquisitions" not in self.root.attrs["plate"]
+ assert "name" not in attrs["plate"]
+ assert "field_count" not in attrs["plate"]
+ assert "acquisitions" not in attrs["plate"]
@pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03()))
def test_legacy_wells(self, fmt):
@@ -761,25 +997,30 @@ def test_legacy_wells(self, fmt):
assert "acquisitions" not in self.root.attrs["plate"]
def test_plate_name(self):
- write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], name="test")
- assert "plate" in self.root.attrs
- assert self.root.attrs["plate"]["columns"] == [{"name": "1"}]
- assert self.root.attrs["plate"]["name"] == "test"
- assert self.root.attrs["plate"]["rows"] == [{"name": "A"}]
- assert self.root.attrs["plate"]["version"] == CurrentFormat().version
- assert self.root.attrs["plate"]["wells"] == [
+ # We don't need to test v04 and v05 for all tests since
+ # the metadata is the same
+ write_plate_metadata(self.root_v3, ["A"], ["1"], ["A/1"], name="test")
+ attrs = self.root_v3.attrs["ome"]
+ assert "plate" in attrs
+ assert attrs["plate"]["columns"] == [{"name": "1"}]
+ assert attrs["plate"]["name"] == "test"
+ assert attrs["plate"]["rows"] == [{"name": "A"}]
+ assert attrs["version"] == FormatV05().version
+ assert attrs["plate"]["wells"] == [
{"path": "A/1", "rowIndex": 0, "columnIndex": 0}
]
- assert "field_count" not in self.root.attrs["plate"]
- assert "acquisitions" not in self.root.attrs["plate"]
+ assert "field_count" not in attrs["plate"]
+ assert "acquisitions" not in attrs["plate"]
def test_field_count(self):
- write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], field_count=10)
+ write_plate_metadata(
+ self.root, ["A"], ["1"], ["A/1"], field_count=10, fmt=FormatV04()
+ )
assert "plate" in self.root.attrs
assert self.root.attrs["plate"]["columns"] == [{"name": "1"}]
assert self.root.attrs["plate"]["field_count"] == 10
assert self.root.attrs["plate"]["rows"] == [{"name": "A"}]
- assert self.root.attrs["plate"]["version"] == CurrentFormat().version
+ assert self.root.attrs["plate"]["version"] == FormatV04().version
assert self.root.attrs["plate"]["wells"] == [
{"path": "A/1", "rowIndex": 0, "columnIndex": 0}
]
@@ -788,12 +1029,14 @@ def test_field_count(self):
def test_acquisitions_minimal(self):
a = [{"id": 1}, {"id": 2}, {"id": 3}]
- write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], acquisitions=a)
+ write_plate_metadata(
+ self.root, ["A"], ["1"], ["A/1"], acquisitions=a, fmt=FormatV04()
+ )
assert "plate" in self.root.attrs
assert self.root.attrs["plate"]["acquisitions"] == a
assert self.root.attrs["plate"]["columns"] == [{"name": "1"}]
assert self.root.attrs["plate"]["rows"] == [{"name": "A"}]
- assert self.root.attrs["plate"]["version"] == CurrentFormat().version
+ assert self.root.attrs["plate"]["version"] == FormatV04().version
assert self.root.attrs["plate"]["wells"] == [
{"path": "A/1", "rowIndex": 0, "columnIndex": 0}
]
@@ -811,12 +1054,14 @@ def test_acquisitions_maximal(self):
"endtime": 1343749392000,
}
]
- write_plate_metadata(self.root, ["A"], ["1"], ["A/1"], acquisitions=a)
+ write_plate_metadata(
+ self.root, ["A"], ["1"], ["A/1"], acquisitions=a, fmt=FormatV04()
+ )
assert "plate" in self.root.attrs
assert self.root.attrs["plate"]["acquisitions"] == a
assert self.root.attrs["plate"]["columns"] == [{"name": "1"}]
assert self.root.attrs["plate"]["rows"] == [{"name": "A"}]
- assert self.root.attrs["plate"]["version"] == CurrentFormat().version
+ assert self.root.attrs["plate"]["version"] == FormatV04().version
assert self.root.attrs["plate"]["wells"] == [
{"path": "A/1", "rowIndex": 0, "columnIndex": 0}
]
@@ -834,7 +1079,7 @@ def test_acquisitions_maximal(self):
def test_invalid_acquisition_keys(self, acquisitions):
with pytest.raises(ValueError):
write_plate_metadata(
- self.root, ["A"], ["1"], ["A/1"], acquisitions=acquisitions
+ self.root_v3, ["A"], ["1"], ["A/1"], acquisitions=acquisitions
)
def test_unspecified_acquisition_keys(self):
@@ -881,7 +1126,7 @@ def test_invalid_well_list(self, wells):
)
def test_invalid_well_keys(self, wells):
with pytest.raises(ValueError):
- write_plate_metadata(self.root, ["A"], ["1"], wells)
+ write_plate_metadata(self.root, ["A"], ["1"], wells, fmt=FormatV04())
@pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03()))
def test_legacy_unspecified_well_keys(self, fmt):
@@ -913,11 +1158,11 @@ def test_unspecified_well_keys(self):
"unspecified_key": "gamma",
},
]
- write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells)
+ write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells, fmt=FormatV04())
assert "plate" in self.root.attrs
assert self.root.attrs["plate"]["columns"] == [{"name": "1"}, {"name": "2"}]
assert self.root.attrs["plate"]["rows"] == [{"name": "A"}, {"name": "B"}]
- assert self.root.attrs["plate"]["version"] == CurrentFormat().version
+ assert self.root.attrs["plate"]["version"] == FormatV04().version
assert self.root.attrs["plate"]["wells"] == wells
def test_missing_well_keys(self):
@@ -927,42 +1172,70 @@ def test_missing_well_keys(self):
{"path": "B/1"},
]
with pytest.raises(ValueError):
- write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells)
+ write_plate_metadata(
+ self.root, ["A", "B"], ["1", "2"], wells, fmt=FormatV04()
+ )
def test_well_not_in_rows(self):
wells = ["A/1", "B/1", "C/1"]
with pytest.raises(ValueError):
- write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells)
+ write_plate_metadata(
+ self.root, ["A", "B"], ["1", "2"], wells, fmt=FormatV04()
+ )
def test_well_not_in_columns(self):
wells = ["A/1", "A/2", "A/3"]
with pytest.raises(ValueError):
- write_plate_metadata(self.root, ["A", "B"], ["1", "2"], wells)
+ write_plate_metadata(
+ self.root, ["A", "B"], ["1", "2"], wells, fmt=FormatV04()
+ )
@pytest.mark.parametrize("rows", (["A", "B", "B"], ["A", "&"]))
def test_invalid_rows(self, rows):
with pytest.raises(ValueError):
- write_plate_metadata(self.root, rows, ["1"], ["A/1"])
+ write_plate_metadata(self.root, rows, ["1"], ["A/1"], fmt=FormatV04())
@pytest.mark.parametrize("columns", (["1", "2", "2"], ["1", "&"]))
def test_invalid_columns(self, columns):
with pytest.raises(ValueError):
- write_plate_metadata(self.root, ["A"], columns, ["A/1"])
+ write_plate_metadata(self.root, ["A"], columns, ["A/1"], fmt=FormatV04())
class TestWellMetadata:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
self.path = pathlib.Path(tmpdir.mkdir("data"))
- self.store = parse_url(self.path, mode="w").store
+ # create zarr v2 group...
+ self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store
self.root = zarr.group(store=self.store)
+ # create zarr v3 group too...
+ self.path_v3 = self.path / "v3"
+ store_v3 = parse_url(self.path_v3, mode="w").store
+ self.root_v3 = zarr.group(store=store_v3)
+
+ @pytest.mark.parametrize("fmt", (FormatV04(), FormatV05()))
@pytest.mark.parametrize("images", (["0"], [{"path": "0"}]))
- def test_minimal_well(self, images):
- write_well_metadata(self.root, images)
- assert "well" in self.root.attrs
- assert self.root.attrs["well"]["images"] == [{"path": "0"}]
- assert self.root.attrs["well"]["version"] == CurrentFormat().version
+ def test_minimal_well(self, images, fmt):
+ if fmt.version == "0.5":
+ group = self.root_v3
+ else:
+ group = self.root
+ write_well_metadata(group, images)
+ # we want to be sure this is zarr v2 / v3, so we load json manually too
+ attrs = group.attrs
+ if fmt.version == "0.5":
+ attrs = attrs.get("ome")
+ assert attrs["version"] == fmt.version
+ json_text = (self.path_v3 / "zarr.json").read_text(encoding="utf-8")
+ attrs_json = json.loads(json_text).get("attributes", {}).get("ome", {})
+ else:
+ json_text = (self.path / ".zattrs").read_text(encoding="utf-8")
+ attrs_json = json.loads(json_text)
+ assert attrs["well"]["version"] == fmt.version
+
+ assert "well" in attrs_json
+ assert attrs["well"]["images"] == [{"path": "0"}]
@pytest.mark.parametrize(
"images",
@@ -976,14 +1249,14 @@ def test_minimal_well(self, images):
),
)
def test_multiple_images(self, images):
- write_well_metadata(self.root, images)
- assert "well" in self.root.attrs
- assert self.root.attrs["well"]["images"] == [
+ write_well_metadata(self.root_v3, images)
+ assert "well" in self.root_v3.attrs.get("ome", {})
+ assert self.root_v3.attrs["ome"]["well"]["images"] == [
{"path": "0"},
{"path": "1"},
{"path": "2"},
]
- assert self.root.attrs["well"]["version"] == CurrentFormat().version
+ self.root_v3.attrs["ome"]["version"] == FormatV05().version
@pytest.mark.parametrize("fmt", (FormatV01(), FormatV02(), FormatV03()))
def test_version(self, fmt):
@@ -1001,7 +1274,7 @@ def test_multiple_acquisitions(self):
write_well_metadata(self.root, images)
assert "well" in self.root.attrs
assert self.root.attrs["well"]["images"] == images
- assert self.root.attrs["well"]["version"] == CurrentFormat().version
+ assert self.root.attrs["well"]["version"] == FormatV04().version
@pytest.mark.parametrize(
"images",
@@ -1025,22 +1298,27 @@ def test_unspecified_images_keys(self):
write_well_metadata(self.root, images)
assert "well" in self.root.attrs
assert self.root.attrs["well"]["images"] == images
- assert self.root.attrs["well"]["version"] == CurrentFormat().version
+ assert self.root.attrs["well"]["version"] == FormatV04().version
class TestLabelWriter:
@pytest.fixture(autouse=True)
def initdir(self, tmpdir):
- self.path = pathlib.Path(tmpdir.mkdir("data.ome.zarr"))
- self.store = parse_url(self.path, mode="w").store
+ self.path = pathlib.Path(tmpdir.mkdir("data"))
+ # create zarr v2 group...
+ self.store = parse_url(self.path, mode="w", fmt=FormatV04()).store
self.root = zarr.group(store=self.store)
+ # create zarr v3 group...
+ self.path_v3 = self.path / "v3"
+ store_v3 = parse_url(self.path_v3, mode="w").store
+ self.root_v3 = zarr.group(store=store_v3)
- def create_image_data(self, shape, scaler, fmt, axes, transformations):
+ def create_image_data(self, group, shape, scaler, fmt, axes, transformations):
rng = np.random.default_rng(0)
data = rng.poisson(10, size=shape).astype(np.uint8)
write_image(
image=data,
- group=self.root,
+ group=group,
scaler=scaler,
fmt=fmt,
axes=axes,
@@ -1066,9 +1344,11 @@ def scaler(self, request):
else:
return None
- def verify_label_data(self, label_name, label_data, fmt, shape, transformations):
+ def verify_label_data(
+ self, img_path, label_name, label_data, fmt, shape, transformations
+ ):
# Verify image data
- reader = Reader(parse_url(f"{self.path}/labels/{label_name}"))
+ reader = Reader(parse_url(f"{img_path}/labels/{label_name}"))
node = next(iter(reader()))
assert Multiscales.matches(node.zarr)
if fmt.version in ("0.1", "0.2"):
@@ -1086,16 +1366,24 @@ def verify_label_data(self, label_name, label_data, fmt, shape, transformations)
assert np.allclose(label_data, node.data[0][...].compute())
# Verify label metadata
- label_root = zarr.open(f"{self.path}/labels", "r")
- assert "labels" in label_root.attrs
- assert label_name in label_root.attrs["labels"]
-
- label_group = zarr.open(f"{self.path}/labels/{label_name}", "r")
- assert "image-label" in label_group.attrs
- assert label_group.attrs["image-label"]["version"] == fmt.version
+ label_root = zarr.open(f"{img_path}/labels", mode="r")
+ label_attrs = label_root.attrs
+ if fmt.version == "0.5":
+ label_attrs = label_attrs["ome"]
+ assert "labels" in label_attrs
+ assert label_name in label_attrs["labels"]
+
+ label_group = zarr.open(f"{img_path}/labels/{label_name}", mode="r")
+ imglabel_attrs = label_group.attrs
+ if fmt.version == "0.5":
+ imglabel_attrs = imglabel_attrs["ome"]
+ assert imglabel_attrs["version"] == fmt.version
+ else:
+ assert imglabel_attrs["image-label"]["version"] == fmt.version
+ assert "image-label" in imglabel_attrs
# Verify multiscale metadata
- name = label_group.attrs["multiscales"][0].get("name", "")
+ name = imglabel_attrs["multiscales"][0].get("name", "")
assert label_name == name
@pytest.mark.parametrize(
@@ -1105,11 +1393,19 @@ def verify_label_data(self, label_name, label_data, fmt, shape, transformations)
pytest.param(FormatV02, id="V02"),
pytest.param(FormatV03, id="V03"),
pytest.param(FormatV04, id="V04"),
+ pytest.param(FormatV05, id="V05"),
),
)
@pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
def test_write_labels(self, shape, scaler, format_version, array_constructor):
fmt = format_version()
+ if fmt.version == "0.5":
+ img_path = self.path_v3
+ group = self.root_v3
+ else:
+ img_path = self.path
+ group = self.root
+
axes = "tczyx"[-len(shape) :]
transformations = []
for dataset_transfs in TRANSFORMATIONS:
@@ -1132,18 +1428,20 @@ def test_write_labels(self, shape, scaler, format_version, array_constructor):
label_data = array_constructor(label_data)
# create the root level image data
- self.create_image_data(shape, scaler, fmt, axes, transformations)
+ self.create_image_data(group, shape, scaler, fmt, axes, transformations)
write_labels(
label_data,
- self.root,
+ group,
scaler=scaler,
name=label_name,
fmt=fmt,
axes=axes,
coordinate_transformations=transformations,
)
- self.verify_label_data(label_name, label_data, fmt, shape, transformations)
+ self.verify_label_data(
+ img_path, label_name, label_data, fmt, shape, transformations
+ )
@pytest.mark.parametrize(
"format_version",
@@ -1152,6 +1450,7 @@ def test_write_labels(self, shape, scaler, format_version, array_constructor):
pytest.param(FormatV02, id="V02"),
pytest.param(FormatV03, id="V03"),
pytest.param(FormatV04, id="V04"),
+ pytest.param(FormatV05, id="V05"),
),
)
@pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
@@ -1159,6 +1458,12 @@ def test_write_multiscale_labels(
self, shape, scaler, format_version, array_constructor
):
fmt = format_version()
+ if fmt.version == "0.5":
+ img_path = self.path_v3
+ group = self.root_v3
+ else:
+ img_path = self.path
+ group = self.root
axes = "tczyx"[-len(shape) :]
transformations = []
for dataset_transfs in TRANSFORMATIONS:
@@ -1185,20 +1490,32 @@ def test_write_multiscale_labels(
labels_mip = scaler.nearest(label_data)
# create the root level image data
- self.create_image_data(shape, scaler, fmt, axes, transformations)
+ self.create_image_data(group, shape, scaler, fmt, axes, transformations)
write_multiscale_labels(
labels_mip,
- self.root,
+ group,
name=label_name,
fmt=fmt,
axes=axes,
coordinate_transformations=transformations,
)
- self.verify_label_data(label_name, label_data, fmt, shape, transformations)
+ self.verify_label_data(
+ img_path, label_name, label_data, fmt, shape, transformations
+ )
+ @pytest.mark.parametrize(
+ "fmt",
+ (pytest.param(FormatV04(), id="V04"), pytest.param(FormatV05(), id="V05")),
+ )
@pytest.mark.parametrize("array_constructor", [np.array, da.from_array])
- def test_two_label_images(self, array_constructor):
+ def test_two_label_images(self, array_constructor, fmt):
+ if fmt.version == "0.5":
+ img_path = self.path_v3
+ group = self.root_v3
+ else:
+ img_path = self.path
+ group = self.root
axes = "tczyx"
transformations = []
for dataset_transfs in TRANSFORMATIONS:
@@ -1208,8 +1525,8 @@ def test_two_label_images(self, array_constructor):
# create the root level image data
shape = (1, 2, 1, 256, 256)
scaler = Scaler()
- fmt = FormatV04()
self.create_image_data(
+ group,
shape,
scaler,
axes=axes,
@@ -1225,17 +1542,21 @@ def test_two_label_images(self, array_constructor):
write_multiscale_labels(
labels_mip,
- self.root,
+ group,
name=label_name,
+ fmt=fmt,
axes=axes,
coordinate_transformations=transformations,
)
- self.verify_label_data(label_name, label_data, fmt, shape, transformations)
+ self.verify_label_data(
+ img_path, label_name, label_data, fmt, shape, transformations
+ )
# Verify label metadata
- label_root = zarr.open(f"{self.path}/labels", "r")
- assert "labels" in label_root.attrs
- assert len(label_root.attrs["labels"]) == len(label_names)
- assert all(
- label_name in label_root.attrs["labels"] for label_name in label_names
- )
+ label_root = zarr.open(f"{img_path}/labels", mode="r")
+ attrs = label_root.attrs
+ if fmt.version == "0.5":
+ attrs = attrs["ome"]
+ assert "labels" in attrs
+ assert len(attrs["labels"]) == len(label_names)
+ assert all(label_name in attrs["labels"] for label_name in label_names)