diff --git a/.isort.cfg b/.isort.cfg index 1bd634a4..4ed4aa47 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,2 +1,2 @@ [settings] -known_third_party = dask,numpy,ome_zarr,omero,omero_rois,omero_zarr,pytest,setuptools,skimage,zarr +known_third_party = dask,ngff_zarr,numpy,omero,omero_rois,omero_zarr,pytest,setuptools,skimage,zarr diff --git a/setup.py b/setup.py index 3e2bffb7..d75958e4 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,13 @@ def get_long_description() -> str: author="The Open Microscopy Team", author_email="", python_requires=">=3", - install_requires=["omero-py>=5.6.0", "ome-zarr>=0.5.0"], + install_requires=[ + "omero-py>=5.6.0", + "zarr>=2.18.0,<3", + "scikit-image", + "dask", + "ngff-zarr", + ], long_description=long_description, keywords=["OMERO.CLI", "plugin"], url="https://github.com/ome/omero-cli-zarr/", diff --git a/src/omero_zarr/__init__.py b/src/omero_zarr/__init__.py index f9feaf6c..7fa2f96f 100644 --- a/src/omero_zarr/__init__.py +++ b/src/omero_zarr/__init__.py @@ -16,11 +16,9 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -from ome_zarr.format import CurrentFormat - from ._version import version as __version__ -ngff_version = CurrentFormat().version +ngff_version = "0.4" __all__ = [ "__version__", diff --git a/src/omero_zarr/masks.py b/src/omero_zarr/masks.py index c8d13a8c..0ef41f94 100644 --- a/src/omero_zarr/masks.py +++ b/src/omero_zarr/masks.py @@ -23,22 +23,20 @@ import time from collections import defaultdict from fileinput import input as finput -from typing import Dict, List, Optional, Set, Tuple +from typing import Any, Dict, List, Optional, Set, Tuple +# from .scale import Scaler +import ngff_zarr as nz import numpy as np import omero.clients # noqa -from ome_zarr.conversions import int_to_rgba_255 -from ome_zarr.io import parse_url -from ome_zarr.reader import Multiscales, Node -from ome_zarr.scale import Scaler -from ome_zarr.types import JSONDict -from ome_zarr.writer import write_multiscale_labels + +# FIXME: from ome_zarr.writer import write_multiscale_labels from omero.model import MaskI, PolygonI from omero.rtypes import unwrap from skimage.draw import polygon as sk_polygon from zarr.hierarchy import open_group -from .util import marshal_axes, marshal_transformations, open_store, print_status +from .util import int_to_rgba_255, open_store, print_status LOGGER = logging.getLogger("omero_zarr.masks") @@ -314,19 +312,27 @@ def save(self, masks: List[omero.model.Shape], name: str) -> None: image_path = source_image if self.output: image_path = os.path.join(self.output, source_image) - src = parse_url(image_path) - assert src, f"Source image does not exist at {image_path}" - input_pyramid = Node(src, []) - assert input_pyramid.load(Multiscales), "No multiscales metadata found" - input_pyramid_levels = len(input_pyramid.data) + assert os.path.exists( + image_path + ), f"Source image does not exist at {image_path}" + # We inspect the image to find out how many levels we have store = open_store(image_path) root = open_group(store) + print("root", root) + root_attrs = root.attrs + print("root.attrs", root_attrs) + # we know we're working with v0.4 here... + ds = root_attrs.get("multiscales", [{}])[0].get("datasets") + # assert src, f"Source image does not exist at {image_path}" + # input_pyramid = Node(src, []) + assert ds is not None, "No multiscales metadata found" + input_pyramid_levels = len(ds) if self.plate: - label_group = root.require_group(self.plate_path) + image_group = root.require_group(self.plate_path) else: - label_group = root + image_group = root _mask_shape: List[int] = list(self.image_shape) mask_shape: Tuple[int, ...] = tuple(_mask_shape) @@ -346,8 +352,6 @@ def save(self, masks: List[omero.model.Shape], name: str) -> None: ignored_dimensions, ) - axes = marshal_axes(self.image) - # For v0.3+ ngff we want to reduce the number of dimensions to # match the dims of the Image. dims_to_squeeze = [] @@ -356,13 +360,34 @@ def save(self, masks: List[omero.model.Shape], name: str) -> None: dims_to_squeeze.append(dim) labels = np.squeeze(labels, axis=tuple(dims_to_squeeze)) - scaler = Scaler(max_layer=input_pyramid_levels) - label_pyramid = scaler.nearest(labels) - transformations = marshal_transformations(self.image, levels=len(label_pyramid)) + # ngff-zarr (don't support "labels" directly...) + # we create the labels group etc... + labels_group = image_group.require_group("labels") + labels_group.attrs["labels"] = [name] + # and write the image there... + # we only downscale in X and Y + # scale_factors needs to include all "spatial" dimensions + # NB: ngff-zarr does NOT scale below the chunk size. Problem if we + # use 1024 chunks but want to scale down to thumbnail size + scale_factors = [ + {"x": 2**n, "y": 2**n, "z": 1} for n in range(1, input_pyramid_levels) + ] + print(f"scale_factors {scale_factors}") + print("labels", labels) + + # FIXME: specify axes info + multiscale_labels = nz.to_multiscales( + labels, + scale_factors=scale_factors, + chunks=64, + method=nz.Methods.ITKWASM_LABEL_IMAGE, + ) + labels_path = os.path.join(image_path, "labels", name) + nz.to_ngff_zarr(labels_path, multiscale_labels, version="0.4") - # Specify and store metadata - image_label_colors: List[JSONDict] = [] - label_properties: List[JSONDict] = [] + # Specify and store image-label metadata + image_label_colors: List[dict[str, Any]] = [] + label_properties: List[dict[str, Any]] = [] image_label = { "colors": image_label_colors, "properties": label_properties, @@ -377,15 +402,8 @@ def save(self, masks: List[omero.model.Shape], name: str) -> None: image_label_colors.append( {"label-value": label_value, "rgba": int_to_rgba_255(rgba_int)} ) - - write_multiscale_labels( - label_pyramid, - label_group, - name, - axes=axes, - coordinate_transformations=transformations, - label_metadata=image_label, - ) + labels_image_group = labels_group.require_group(name) + labels_image_group.attrs["image-label"] = image_label def shape_to_binim_yx( self, shape: omero.model.Shape @@ -474,7 +492,7 @@ def masks_to_labels( mask_shape: Tuple[int, ...], ignored_dimensions: Optional[Set[str]] = None, check_overlaps: Optional[bool] = None, - ) -> Tuple[np.ndarray, Dict[int, str], Dict[int, Dict]]: + ) -> Tuple[np.ndarray, Dict[int, int], Dict[int, Dict]]: """ :param masks [MaskI]: Iterable container of OMERO masks :param mask_shape 5-tuple: the image dimensions (T, C, Z, Y, X), taking @@ -534,7 +552,7 @@ def masks_to_labels( labels.shape == mask_shape ), f"Invalid label shape: {labels.shape}, expected {mask_shape}" - fillColors: Dict[int, str] = {} + fillColors: Dict[int, int] = {} properties: Dict[int, Dict] = {} for count, shapes in enumerate(masks): diff --git a/src/omero_zarr/raw_pixels.py b/src/omero_zarr/raw_pixels.py index 40546094..61e4c5ff 100644 --- a/src/omero_zarr/raw_pixels.py +++ b/src/omero_zarr/raw_pixels.py @@ -20,18 +20,13 @@ import math import os import time -from typing import Any, Dict, List, Optional, Tuple +from collections import defaultdict +from typing import Any, Dict, List, Optional, Tuple, Union import dask.array as da import numpy as np import omero.clients # noqa import omero.gateway # required to allow 'from omero_zarr import raw_pixels' -from ome_zarr.dask_utils import resize as da_resize -from ome_zarr.writer import ( - write_multiscales_metadata, - write_plate_metadata, - write_well_metadata, -) from omero.model import Channel from omero.model.enums import ( PixelsTypedouble, @@ -49,6 +44,7 @@ from . import __version__ from . import ngff_version as VERSION from .util import marshal_axes, marshal_transformations, open_store, print_status +from .util import resize as da_resize def image_to_zarr(image: omero.gateway.ImageWrapper, args: argparse.Namespace) -> None: @@ -96,7 +92,16 @@ def add_image( for dataset, transform in zip(datasets, transformations): dataset["coordinateTransformations"] = transform - write_multiscales_metadata(parent, datasets, axes=axes) + # write_multiscales_metadata(parent, datasets, axes=axes) + multiscales = [ + { + "version": VERSION, + "datasets": datasets, + "name": image.name, + "axes": axes, + } + ] + parent.attrs["multiscales"] = multiscales return (level_count, axes) @@ -290,10 +295,20 @@ def plate_to_zarr(plate: omero.gateway._PlateWrapper, args: argparse.Namespace) # sort by row then column... wells = sorted(wells, key=lambda x: (x.row, x.column)) + well_list = [] + fields_by_acq_well: dict[int, dict] = defaultdict(lambda: defaultdict(set)) + for well in wells: row = plate.getRowLabels()[well.row] col = plate.getColumnLabels()[well.column] fields = [] + well_list.append( + { + "path": f"{row}/{col}", + "rowIndex": well.row, + "columnIndex": well.column, + } + ) for field in range(n_fields[0], n_fields[1] + 1): ws = well.getWellSample(field) if ws and ws.getImage(): @@ -305,6 +320,7 @@ def plate_to_zarr(plate: omero.gateway._PlateWrapper, args: argparse.Namespace) field_info = {"path": f"{field_name}"} if ac: field_info["acquisition"] = ac.id + fields_by_acq_well[ac.id][well.id].add(field) fields.append(field_info) row_group = root.require_group(row) col_group = row_group.require_group(col) @@ -312,20 +328,26 @@ def plate_to_zarr(plate: omero.gateway._PlateWrapper, args: argparse.Namespace) add_image(img, field_group) add_omero_metadata(field_group, img) # Update Well metadata after each image - write_well_metadata(col_group, fields) + # write_well_metadata(col_group, fields) + col_group.attrs["well"] = fields max_fields = max(max_fields, field + 1) print_status(int(t0), int(time.time()), count, total) # Update plate_metadata after each Well - write_plate_metadata( - root, - row_names, - col_names, - wells=list(well_paths), - field_count=max_fields, - acquisitions=plate_acq, - name=plate.name, - ) + plate_data: dict[str, Union[str, int, list[dict]]] = { + "columns": [{"name": str(col)} for col in col_names], + "rows": [{"name": str(row)} for row in row_names], + "wells": well_list, + "version": VERSION, + "name": plate.name, + "field_count": max_fields, + } + if plate_acq is not None: + for acq in plate_acq: + fcounts = [len(f) for f in fields_by_acq_well[acq["id"]].values()] + acq["maximumfieldcount"] = max(fcounts) + plate_data["acquisitions"] = plate_acq + root.attrs["plate"] = plate_data add_toplevel_metadata(root) print("Finished.") diff --git a/src/omero_zarr/util.py b/src/omero_zarr/util.py index 0a4e3829..0d945b50 100644 --- a/src/omero_zarr/util.py +++ b/src/omero_zarr/util.py @@ -17,8 +17,11 @@ # along with this program. If not, see . import time -from typing import Dict, List +from typing import Any, Dict, List +import dask.array as da +import numpy as np +import skimage.transform from omero.gateway import ImageWrapper from zarr.storage import FSStore @@ -128,3 +131,67 @@ def marshal_transformations( zooms["y"] = zooms["y"] * multiscales_zoom return transformations + + +def resize( + image: da.Array, output_shape: tuple[int, ...], *args: Any, **kwargs: Any +) -> da.Array: + r""" + Wrapped copy of "skimage.transform.resize" + Resize image to match a certain size. + :type image: :class:`dask.array` + :param image: The dask array to resize + :type output_shape: tuple + :param output_shape: The shape of the resize array + :type \*args: list + :param \*args: Arguments of skimage.transform.resize + :type \*\*kwargs: dict + :param \*\*kwargs: Keyword arguments of skimage.transform.resize + :return: Resized image. + """ + factors = np.array(output_shape) / np.array(image.shape).astype(float) + # Rechunk the input blocks so that the factors achieve an output + # blocks size of full numbers. + better_chunksize = tuple( + np.maximum(1, np.round(np.array(image.chunksize) * factors) / factors).astype( + int + ) + ) + image_prepared = image.rechunk(better_chunksize) + + # If E.g. we resize image from 6675 by 0.5 to 3337, factor is 0.49992509 so each + # chunk of size e.g. 1000 will resize to 499. When assumbled into a new array, the + # array will now be of size 3331 instead of 3337 because each of 6 chunks was + # smaller by 1. When we compute() this, dask will read 6 chunks of 1000 and expect + # last chunk to be 337 but instead it will only be 331. + # So we use ceil() here (and in resize_block) to round 499.925 up to chunk of 500 + block_output_shape = tuple( + np.ceil(np.array(better_chunksize) * factors).astype(int) + ) + + # Map overlap + def resize_block(image_block: da.Array, block_info: dict) -> da.Array: + # if the input block is smaller than a 'regular' chunk (e.g. edge of image) + # we need to calculate target size for each chunk... + chunk_output_shape = tuple( + np.ceil(np.array(image_block.shape) * factors).astype(int) + ) + return skimage.transform.resize( + image_block, chunk_output_shape, *args, **kwargs + ).astype(image_block.dtype) + + output_slices = tuple(slice(0, d) for d in output_shape) + output = da.map_blocks( + resize_block, image_prepared, dtype=image.dtype, chunks=block_output_shape + )[output_slices] + return output.rechunk(image.chunksize).astype(image.dtype) + + +def int_to_rgba_255(v: int) -> list[int]: + """Get rgba (0-255) from integer. + >>> print(int_to_rgba_255(0)) + [0, 0, 0, 0] + >>> print([round(x, 3) for x in int_to_rgba_255(100100)]) + [0, 1, 135, 4] + """ + return list(v.to_bytes(4, signed=True, byteorder="big")) diff --git a/test/integration/clitest/test_export.py b/test/integration/clitest/test_export.py index 419967f6..4e3fe311 100644 --- a/test/integration/clitest/test_export.py +++ b/test/integration/clitest/test_export.py @@ -159,8 +159,9 @@ def test_export_masks(self, capsys: pytest.CaptureFixture, tmp_path: Path) -> No labels_json = json.loads(labels_text) assert labels_json["image-label"]["colors"] == [{"label-value": 1, "rgba": red}] + path0 = labels_json["multiscales"][0]["datasets"][0]["path"] arr_text = ( - tmp_path / f"{img_id}.zarr" / "labels" / "0" / "0" / ".zarray" + tmp_path / f"{img_id}.zarr" / "labels" / "0" / path0 / ".zarray" ).read_text(encoding="utf-8") arr_json = json.loads(arr_text) assert arr_json["shape"] == [1, 512, 512]