Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion config/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
samples: 'config/samples.tsv'

root: '/cifs/khan_new/datasets/MIND/mouse_appmaptapoe' # can use a s3:// or gcs:// prefix to write output to cloud storage
root: ''
#cifs/khan_new/datasets/MIND/mouse_appmaptapoe' # can use a s3:// or gcs:// prefix to write output to cloud storage
work: ""

remote_creds: '~/.config/gcloud/application_default_credentials.json' #this is needed so we can pass creds to container
Expand Down
12 changes: 2 additions & 10 deletions config/samples.tsv
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
subject sample acq stain_0 stain_1 stain_2 sample_path
F6A2Te3 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240801_13_F6_A2Te3_68_B_62_4x1_14-47-34
F4A1Te3 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240802_1_F4_1e3_66_A_61_4x1_14-39-52
M1A2Te3 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240803_18_M1_A2Te3_73_G_62_4x1_00-10-57
M4A1Te3 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240828_8_M1_A1Te3_67_H_61_4x1_15-07-06
F1A2Te3 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240829_14_F1_A2Te3_68_c_62_4x1_01-43-28
F2A2Te3 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240903_15_F2_A2Te3_68_E_62_4x1_14-23-04
M3A2Te3 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240904_19_M3_A2Te3_73_I_62_4x1_08-55-39
F1A1Te4 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240904_4_F1_A1Te4_70_D_63_4x1_23-10-52
M4A2Te3 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240906_20_M4_A2Te3_73_J_62_4x1_12-53-47
M7A1Te4 brain blaze Abeta PI Lectin /cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240907_16_M7_A1Te4_71_F_63_4x1_21-07-03
AS134F3 brain imaris4x Iba1 Abeta YoPro /nfs/trident3/lightsheet/prado/mouse_app_lecanemab_ki3/raw/ims_4x_stitched/A_AS134F3/10-59-07_a- AS134F3 IK3 PBS IBA1 647 ab RedX YoPro 4X1_Blaze_C00.ome.ims
AS135M5 brain imaris4x Iba1 Abeta YoPro /nfs/trident3/lightsheet/prado/mouse_app_lecanemab_ki3/raw/ims_4x_stitched/A_AS134F3/10-59-07_a- AS134F3 IK3 PBS IBA1 647 ab RedX YoPro 4X1_Blaze_C00.ome.ims
336 changes: 316 additions & 20 deletions pixi.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@ features = ["runtime", "dev"]
features = ["dev"]

[pypi-dependencies]
ngff-zarr = { version = ">=0.13.2, <0.14", extras = ["all"] }
zarrnii = "==0.3.0-a1"
ngff-zarr = { version = ">=0.18.1,<0.19", extras = ["all"] }
zarrnii = ">=0.8.0a1,<0.9.0"
2 changes: 0 additions & 2 deletions submit_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ def submit_job(
slurm_config["time"],
"--mem",
slurm_config["mem"],
"--nodelist",
"rri-cbs-h2.schulich.uwo.ca",
"--cpus-per-task",
slurm_config["cpus"],
"--tmp",
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def get_all_targets():
datatype="micr",
sample="{sample}",
acq="{acq}",
res="{level}x",
level="{level}",
stain="{stain}",
suffix="SPIM.nii",
expand_kwargs=dict(
Expand Down
11 changes: 4 additions & 7 deletions workflow/rules/ome_zarr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,17 @@ rule tif_stacks_to_ome_zarr:

rule ome_zarr_to_nii:
input:
**get_storage_creds(),
zarr=get_input_ome_zarr_to_nii,
params:
channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain),
uri=get_output_ome_zarr_uri(),
storage_provider_settings=workflow.storage_provider_settings,
zarrnii_kwargs={},
output:
nii=bids(
root=resampled,
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq}",
res="{level}x",
level="{level}",
stain="{stain}",
suffix="SPIM.nii",
),
Expand All @@ -114,7 +111,7 @@ rule ome_zarr_to_nii:
subject="{subject}",
sample="{sample}",
acq="{acq}",
res="{level}x",
level="{level}",
stain="{stain}",
suffix="benchmark.tsv",
)
Expand All @@ -125,7 +122,7 @@ rule ome_zarr_to_nii:
subject="{subject}",
sample="{sample}",
acq="{acq}",
res="{level}x",
level="{level}",
stain="{stain}",
suffix="log.txt",
),
Expand Down
45 changes: 40 additions & 5 deletions workflow/scripts/imaris_channel_to_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,49 @@
store = zarr.ZipStore(snakemake.output.zarr, dimension_separator="/", mode="x")
dest = zarr.group(store)

zarr.copy(

def print_tree_item(name, obj):
"""
A visitor function to print the name and type of HDF5 objects.
"""
# Indentation based on group depth
shift = name.count("/") * " "
if isinstance(obj, h5py.Dataset):
print(f"{shift}Dataset: {name} (shape: {obj.shape}, dtype: {obj.dtype})")
elif isinstance(obj, h5py.Group):
print(f"{shift}Group: {name}/")
else:
print(f"{shift}Other: {name}")

# Optionally, print attributes as well
for key, val in obj.attrs.items():
print(f"{shift} Attribute: {key}: {val}")


print(
source[
"DataSet/ResolutionLevel 0/TimePoint 0/Channel {chan}/Data".format(
chan=snakemake.params.channel
)
].shape
)
source.visititems(print_tree_item)
print(
source[
"DataSet/ResolutionLevel 0/TimePoint 0/Channel {chan}/Data".format(
chan=snakemake.params.channel
)
],
dest,
log=stdout,
compressor=None,
].shape
)

# zarr.copy(
# source[
# "DataSet/ResolutionLevel 0/TimePoint 0/Channel {chan}/Data".format(
# chan=snakemake.params.channel
# )
# ],
# dest,
# log=stdout,
# compressor=None,
# )
source.close()
211 changes: 180 additions & 31 deletions workflow/scripts/imaris_to_metadata.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,189 @@
import html
import json
import re

import h5py
import xmltodict

with h5py.File(snakemake.input.ims, "r") as hdf5_file:
xml_data = hdf5_file["DataSetInfo/OME Image Tags/Image 0"][:]
# -----------------------------
# Helper functions
# -----------------------------


# Convert byte array to string and then to a dictionary
xml_str = bytes(xml_data).decode(
"utf-8", errors="ignore"
) # Decode byte array to string
def parse_xml_str(xml_str):
"""Convert XML string to dict, decoding HTML entities (e.g. &#181; → µ)."""

def decode_entities(obj):
"""Recursively decode HTML entities in all strings."""
if isinstance(obj, dict):
return {k: decode_entities(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [decode_entities(i) for i in obj]
elif isinstance(obj, str):
return html.unescape(obj)
else:
return obj

try:
parsed = xmltodict.parse(xml_str, namespace_separator=":")
return decode_entities(parsed)

except Exception as e:
print(f"Error parsing XML: {e}")
return None


def parse_xml_bytes(xml_bytes):
"""Convert raw byte array to parsed XML dict with namespace separator."""
xml_str = bytes(xml_bytes).decode("utf-8", errors="ignore")
try:
return xmltodict.parse(f"<root>{xml_str}</root>", namespace_separator=":")
except Exception as e:
print(f"Error parsing XML: {e}")
return None


def clean_keys(obj):
"""Recursively remove '@' from dict keys."""
if isinstance(obj, dict):
return {k.replace("@", ""): clean_keys(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [clean_keys(i) for i in obj]
return obj


def build_bids_metadata(custom_attrs):
"""Convert custom attribute dict into BIDS microscopy-compliant metadata."""
# --- PixelSize and Units ---
px_x = float(custom_attrs["DataAxis0"]["@PhysicalUnit"])
px_y = float(custom_attrs["DataAxis1"]["@PhysicalUnit"])
px_z = abs(float(custom_attrs["DataAxis2"]["@PhysicalUnit"]))
pixel_size = [px_x, px_y, px_z]

unit_label = custom_attrs.get("AxesLabels", {}).get("@FirstAxis-Unit", "µm")
unit_label = unit_label.replace("µ", "u") if "µ" in unit_label else unit_label

# --- Extract Magnification (if present) ---
obj_id = custom_attrs.get("ObjectiveID", {}).get("@ObjectiveID", "")
magnification_match = re.search(r"(\d+(?:\.\d+)?)x", obj_id)
magnification = float(magnification_match.group(1)) if magnification_match else None

# --- Build BIDS JSON dict ---
bids_json = {
"PixelSize": pixel_size,
"PixelSizeUnits": unit_label,
"Immersion": custom_attrs.get("ObjectiveMedium", {}).get("@ObjectiveMedium"),
"NumericalAperture": float(
custom_attrs.get("ObjectiveNA", {}).get("@ObjectiveNA", 0.0)
),
"Magnification": magnification,
"OtherAcquisitionParameters": custom_attrs.get("MeasurementMode", {}).get(
"@MeasurementMode"
),
"InstrumentModel": custom_attrs.get("InstrumentMode", {}).get(
"@InstrumentMode"
),
"SoftwareVersions": custom_attrs.get("ImspectorVersion", {}).get(
"@ImspectorVersion"
),
}

# --- Collect non-BIDS fields into ExtraMetadata ---
excluded = {
"ObjectiveMedium",
"ObjectiveNA",
"ObjectiveID",
"MeasurementMode",
"InstrumentMode",
"ImspectorVersion",
"DataAxis0",
"DataAxis1",
"DataAxis2",
"AxesLabels",
}
extra_metadata = {
k: clean_keys(v) for k, v in custom_attrs.items() if k not in excluded
}
bids_json["ExtraMetadata"] = extra_metadata

return bids_json


# -----------------------------
# Main extraction
# -----------------------------


def print_h5_keys_recursive(obj, indent=""):
"""
Recursively prints the keys and their hierarchy within an h5py object.

Args:
obj: An h5py.File or h5py.Group object.
indent: A string representing the current indentation level for printing.
"""
if isinstance(obj, h5py.File) or isinstance(obj, h5py.Group):
for key, value in obj.items():
print(f"{indent}- {key}")
if isinstance(value, h5py.Group):
print_h5_keys_recursive(value, indent + " ")
elif isinstance(obj, h5py.Dataset):
# Datasets are typically leaf nodes, their names are already printed by the parent group
pass
else:
print(f"{indent}- (Unknown HDF5 object type: {type(obj)})")


try:
xml_dict = xmltodict.parse(f"<root>{xml_str}</root>", namespace_separator=":")
except Exception as e:
print(f"Error parsing XML: {e}")


metadata = {}
metadata["physical_size_x"] = float(
xml_dict["root"]["ca:CustomAttributes"]["DataAxis0"]["@PhysicalUnit"]
)
metadata["physical_size_y"] = float(
xml_dict["root"]["ca:CustomAttributes"]["DataAxis1"]["@PhysicalUnit"]
)
metadata["physical_size_z"] = abs(
float(xml_dict["root"]["ca:CustomAttributes"]["DataAxis3"]["@PhysicalUnit"])
)
metadata["PixelSize"] = [
metadata["physical_size_z"] / 1000.0,
metadata["physical_size_y"] / 1000.0,
metadata["physical_size_x"] / 1000.0,
] # zyx since OME-Zarr is ZYX
metadata["PixelSizeUnits"] = "mm"

# write metadata to json
with open(snakemake.output.metadata_json, "w") as fp:
json.dump(metadata, fp, indent=4)

with h5py.File(snakemake.input.ims, "r") as hdf5_file:
# print(snakemake.input.ims)
# print_h5_keys_recursive(hdf5_file)
xml_data = hdf5_file["DataSetInfo/OME Image Tags/Image 0"][:]
xml_dict = parse_xml_bytes(xml_data)
custom_attrs = xml_dict["root"]["ca:CustomAttributes"]
except:
print(
"Warning: cannot find OME metadata from imaris file, searching for tif file using standard folder structure"
)

from glob import glob
from pathlib import Path

import tifffile

p = Path(snakemake.input.ims)

# 1) Name of the parent folder
subj = p.parent.name # -> 'B_AS161F3'

# 2) Root path of the 'raw' folder (3 levels up from the file)
raw_root = p.parents[2] # -> '/nfs/trident3/.../raw'

# 3) Construct the new path
new_path = str(raw_root / "tif_4x_*" / subj / "*.ome.tif")
try:
tif_file = sorted(glob(new_path))[0]
with tifffile.TiffFile(tif_file) as tif:
xml_data = tif.ome_metadata
xml_dict = parse_xml_str(xml_data)
custom_attrs = xml_dict["OME"]["Image"]["ca:CustomAttributes"]
except:
raise (
ValueError(
"Cannot find OME metadata in imaris file or in associated tif files from standard folder structure"
)
)


if not xml_dict:
raise ValueError("Failed to parse XML from .ims file")

print(custom_attrs)
bids_metadata = build_bids_metadata(custom_attrs)

# -----------------------------
# Write to JSON
# -----------------------------
with open(snakemake.output.metadata_json, "w", encoding="utf-8") as fp:
json.dump(bids_metadata, fp, indent=4, ensure_ascii=False)
Loading