khanlab · akhanf · Nov 18, 2025 · Nov 8, 2025 · Nov 9, 2025 · Nov 9, 2025
diff --git a/config/config.yml b/config/config.yml
@@ -1,6 +1,7 @@
 samples: 'config/samples.tsv'
 
-root: '/cifs/khan_new/datasets/MIND/mouse_appmaptapoe' # can use a s3:// or gcs:// prefix to write output to cloud storage
+root: ''
+#cifs/khan_new/datasets/MIND/mouse_appmaptapoe' # can use a s3:// or gcs:// prefix to write output to cloud storage
 work: ""
 
 remote_creds: '~/.config/gcloud/application_default_credentials.json' #this is needed so we can pass creds to container

diff --git a/config/samples.tsv b/config/samples.tsv
@@ -1,11 +1,3 @@
 subject	sample	acq	stain_0	stain_1	stain_2	sample_path
-F6A2Te3	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240801_13_F6_A2Te3_68_B_62_4x1_14-47-34
-F4A1Te3	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240802_1_F4_1e3_66_A_61_4x1_14-39-52
-M1A2Te3	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240803_18_M1_A2Te3_73_G_62_4x1_00-10-57
-M4A1Te3	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240828_8_M1_A1Te3_67_H_61_4x1_15-07-06
-F1A2Te3	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240829_14_F1_A2Te3_68_c_62_4x1_01-43-28
-F2A2Te3	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240903_15_F2_A2Te3_68_E_62_4x1_14-23-04
-M3A2Te3	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240904_19_M3_A2Te3_73_I_62_4x1_08-55-39
-F1A1Te4	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240904_4_F1_A1Te4_70_D_63_4x1_23-10-52
-M4A2Te3	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240906_20_M4_A2Te3_73_J_62_4x1_12-53-47
-M7A1Te4	brain	blaze	Abeta	PI	Lectin	/cifs/trident/projects/mouse_appmaptapoe/lightsheet/sourcedata/newBlaze/4x1/240907_16_M7_A1Te4_71_F_63_4x1_21-07-03
+AS134F3	brain	imaris4x	Iba1	Abeta	YoPro	/nfs/trident3/lightsheet/prado/mouse_app_lecanemab_ki3/raw/ims_4x_stitched/A_AS134F3/10-59-07_a- AS134F3 IK3 PBS IBA1 647 ab RedX YoPro 4X1_Blaze_C00.ome.ims
+AS135M5	brain	imaris4x	Iba1	Abeta	YoPro	/nfs/trident3/lightsheet/prado/mouse_app_lecanemab_ki3/raw/ims_4x_stitched/A_AS134F3/10-59-07_a- AS134F3 IK3 PBS IBA1 647 ab RedX YoPro 4X1_Blaze_C00.ome.ims
diff --git a/pixi.lock b/pixi.lock
diff --git a/pixi.toml b/pixi.toml
@@ -58,5 +58,5 @@ features = ["runtime", "dev"]
 features = ["dev"]
 
 [pypi-dependencies]
-ngff-zarr = { version = ">=0.13.2, <0.14", extras = ["all"] }
-zarrnii = "==0.3.0-a1"
+ngff-zarr = { version = ">=0.18.1,<0.19", extras = ["all"] }
+zarrnii = ">=0.8.0a1,<0.9.0"
diff --git a/submit_jobs.py b/submit_jobs.py
@@ -46,8 +46,6 @@ def submit_job(
         slurm_config["time"],
         "--mem",
         slurm_config["mem"],
-        "--nodelist",
-        "rri-cbs-h2.schulich.uwo.ca",
         "--cpus-per-task",
         slurm_config["cpus"],
         "--tmp",

diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk
@@ -109,7 +109,7 @@ def get_all_targets():
                 datatype="micr",
                 sample="{sample}",
                 acq="{acq}",
-                res="{level}x",
+                level="{level}",
                 stain="{stain}",
                 suffix="SPIM.nii",
                 expand_kwargs=dict(

diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk
@@ -90,20 +90,17 @@ rule tif_stacks_to_ome_zarr:
 
 rule ome_zarr_to_nii:
     input:
-        **get_storage_creds(),
         zarr=get_input_ome_zarr_to_nii,
     params:
-        channel_index=lambda wildcards: get_stains(wildcards).index(wildcards.stain),
-        uri=get_output_ome_zarr_uri(),
-        storage_provider_settings=workflow.storage_provider_settings,
+        zarrnii_kwargs={},
     output:
         nii=bids(
             root=resampled,
             subject="{subject}",
             datatype="micr",
             sample="{sample}",
             acq="{acq}",
-            res="{level}x",
+            level="{level}",
             stain="{stain}",
             suffix="SPIM.nii",
         ),
@@ -114,7 +111,7 @@ rule ome_zarr_to_nii:
             subject="{subject}",
             sample="{sample}",
             acq="{acq}",
-            res="{level}x",
+            level="{level}",
             stain="{stain}",
             suffix="benchmark.tsv",
         )
@@ -125,7 +122,7 @@ rule ome_zarr_to_nii:
             subject="{subject}",
             sample="{sample}",
             acq="{acq}",
-            res="{level}x",
+            level="{level}",
             stain="{stain}",
             suffix="log.txt",
         ),

diff --git a/workflow/scripts/imaris_channel_to_zarr.py b/workflow/scripts/imaris_channel_to_zarr.py
@@ -9,14 +9,49 @@
 store = zarr.ZipStore(snakemake.output.zarr, dimension_separator="/", mode="x")
 dest = zarr.group(store)
 
-zarr.copy(
+
+def print_tree_item(name, obj):
+    """
+    A visitor function to print the name and type of HDF5 objects.
+    """
+    # Indentation based on group depth
+    shift = name.count("/") * "  "
+    if isinstance(obj, h5py.Dataset):
+        print(f"{shift}Dataset: {name} (shape: {obj.shape}, dtype: {obj.dtype})")
+    elif isinstance(obj, h5py.Group):
+        print(f"{shift}Group: {name}/")
+    else:
+        print(f"{shift}Other: {name}")
+
+    # Optionally, print attributes as well
+    for key, val in obj.attrs.items():
+        print(f"{shift}  Attribute: {key}: {val}")
+
+
+print(
+    source[
+        "DataSet/ResolutionLevel 0/TimePoint 0/Channel {chan}/Data".format(
+            chan=snakemake.params.channel
+        )
+    ].shape
+)
+source.visititems(print_tree_item)
+print(
     source[
         "DataSet/ResolutionLevel 0/TimePoint 0/Channel {chan}/Data".format(
             chan=snakemake.params.channel
         )
-    ],
-    dest,
-    log=stdout,
-    compressor=None,
+    ].shape
 )
+
+# zarr.copy(
+#    source[
+#        "DataSet/ResolutionLevel 0/TimePoint 0/Channel {chan}/Data".format(
+#            chan=snakemake.params.channel
+#        )
+#    ],
+#    dest,
+#    log=stdout,
+#    compressor=None,
+# )
 source.close()
diff --git a/workflow/scripts/imaris_to_metadata.py b/workflow/scripts/imaris_to_metadata.py
@@ -1,40 +1,189 @@
+import html
 import json
+import re
 
 import h5py
 import xmltodict
 
-with h5py.File(snakemake.input.ims, "r") as hdf5_file:
-    xml_data = hdf5_file["DataSetInfo/OME Image Tags/Image 0"][:]
+# -----------------------------
+# Helper functions
+# -----------------------------
 
 
-# Convert byte array to string and then to a dictionary
-xml_str = bytes(xml_data).decode(
-    "utf-8", errors="ignore"
-)  # Decode byte array to string
+def parse_xml_str(xml_str):
+    """Convert XML string to dict, decoding HTML entities (e.g. &#181; → µ)."""
+
+    def decode_entities(obj):
+        """Recursively decode HTML entities in all strings."""
+        if isinstance(obj, dict):
+            return {k: decode_entities(v) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [decode_entities(i) for i in obj]
+        elif isinstance(obj, str):
+            return html.unescape(obj)
+        else:
+            return obj
+
+    try:
+        parsed = xmltodict.parse(xml_str, namespace_separator=":")
+        return decode_entities(parsed)
+
+    except Exception as e:
+        print(f"Error parsing XML: {e}")
+        return None
+
+
+def parse_xml_bytes(xml_bytes):
+    """Convert raw byte array to parsed XML dict with namespace separator."""
+    xml_str = bytes(xml_bytes).decode("utf-8", errors="ignore")
+    try:
+        return xmltodict.parse(f"<root>{xml_str}</root>", namespace_separator=":")
+    except Exception as e:
+        print(f"Error parsing XML: {e}")
+        return None
+
+
+def clean_keys(obj):
+    """Recursively remove '@' from dict keys."""
+    if isinstance(obj, dict):
+        return {k.replace("@", ""): clean_keys(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [clean_keys(i) for i in obj]
+    return obj
+
+
+def build_bids_metadata(custom_attrs):
+    """Convert custom attribute dict into BIDS microscopy-compliant metadata."""
+    # --- PixelSize and Units ---
+    px_x = float(custom_attrs["DataAxis0"]["@PhysicalUnit"])
+    px_y = float(custom_attrs["DataAxis1"]["@PhysicalUnit"])
+    px_z = abs(float(custom_attrs["DataAxis2"]["@PhysicalUnit"]))
+    pixel_size = [px_x, px_y, px_z]
+
+    unit_label = custom_attrs.get("AxesLabels", {}).get("@FirstAxis-Unit", "µm")
+    unit_label = unit_label.replace("µ", "u") if "µ" in unit_label else unit_label
+
+    # --- Extract Magnification (if present) ---
+    obj_id = custom_attrs.get("ObjectiveID", {}).get("@ObjectiveID", "")
+    magnification_match = re.search(r"(\d+(?:\.\d+)?)x", obj_id)
+    magnification = float(magnification_match.group(1)) if magnification_match else None
+
+    # --- Build BIDS JSON dict ---
+    bids_json = {
+        "PixelSize": pixel_size,
+        "PixelSizeUnits": unit_label,
+        "Immersion": custom_attrs.get("ObjectiveMedium", {}).get("@ObjectiveMedium"),
+        "NumericalAperture": float(
+            custom_attrs.get("ObjectiveNA", {}).get("@ObjectiveNA", 0.0)
+        ),
+        "Magnification": magnification,
+        "OtherAcquisitionParameters": custom_attrs.get("MeasurementMode", {}).get(
+            "@MeasurementMode"
+        ),
+        "InstrumentModel": custom_attrs.get("InstrumentMode", {}).get(
+            "@InstrumentMode"
+        ),
+        "SoftwareVersions": custom_attrs.get("ImspectorVersion", {}).get(
+            "@ImspectorVersion"
+        ),
+    }
+
+    # --- Collect non-BIDS fields into ExtraMetadata ---
+    excluded = {
+        "ObjectiveMedium",
+        "ObjectiveNA",
+        "ObjectiveID",
+        "MeasurementMode",
+        "InstrumentMode",
+        "ImspectorVersion",
+        "DataAxis0",
+        "DataAxis1",
+        "DataAxis2",
+        "AxesLabels",
+    }
+    extra_metadata = {
+        k: clean_keys(v) for k, v in custom_attrs.items() if k not in excluded
+    }
+    bids_json["ExtraMetadata"] = extra_metadata
+
+    return bids_json
+
+
+# -----------------------------
+# Main extraction
+# -----------------------------
+
+
+def print_h5_keys_recursive(obj, indent=""):
+    """
+    Recursively prints the keys and their hierarchy within an h5py object.
+
+    Args:
+        obj: An h5py.File or h5py.Group object.
+        indent: A string representing the current indentation level for printing.
+    """
+    if isinstance(obj, h5py.File) or isinstance(obj, h5py.Group):
+        for key, value in obj.items():
+            print(f"{indent}- {key}")
+            if isinstance(value, h5py.Group):
+                print_h5_keys_recursive(value, indent + "  ")
+    elif isinstance(obj, h5py.Dataset):
+        # Datasets are typically leaf nodes, their names are already printed by the parent group
+        pass
+    else:
+        print(f"{indent}- (Unknown HDF5 object type: {type(obj)})")
+
 
 try:
-    xml_dict = xmltodict.parse(f"<root>{xml_str}</root>", namespace_separator=":")
-except Exception as e:
-    print(f"Error parsing XML: {e}")
-
-
-metadata = {}
-metadata["physical_size_x"] = float(
-    xml_dict["root"]["ca:CustomAttributes"]["DataAxis0"]["@PhysicalUnit"]
-)
-metadata["physical_size_y"] = float(
-    xml_dict["root"]["ca:CustomAttributes"]["DataAxis1"]["@PhysicalUnit"]
-)
-metadata["physical_size_z"] = abs(
-    float(xml_dict["root"]["ca:CustomAttributes"]["DataAxis3"]["@PhysicalUnit"])
-)
-metadata["PixelSize"] = [
-    metadata["physical_size_z"] / 1000.0,
-    metadata["physical_size_y"] / 1000.0,
-    metadata["physical_size_x"] / 1000.0,
-]  # zyx since OME-Zarr is ZYX
-metadata["PixelSizeUnits"] = "mm"
-
-# write metadata to json
-with open(snakemake.output.metadata_json, "w") as fp:
-    json.dump(metadata, fp, indent=4)
+
+    with h5py.File(snakemake.input.ims, "r") as hdf5_file:
+        #        print(snakemake.input.ims)
+        #        print_h5_keys_recursive(hdf5_file)
+        xml_data = hdf5_file["DataSetInfo/OME Image Tags/Image 0"][:]
+        xml_dict = parse_xml_bytes(xml_data)
+        custom_attrs = xml_dict["root"]["ca:CustomAttributes"]
+except:
+    print(
+        "Warning: cannot find OME metadata from imaris file, searching for tif file using standard folder structure"
+    )
+
+    from glob import glob
+    from pathlib import Path
+
+    import tifffile
+
+    p = Path(snakemake.input.ims)
+
+    # 1) Name of the parent folder
+    subj = p.parent.name  # -> 'B_AS161F3'
+
+    # 2) Root path of the 'raw' folder (3 levels up from the file)
+    raw_root = p.parents[2]  # -> '/nfs/trident3/.../raw'
+
+    # 3) Construct the new path
+    new_path = str(raw_root / "tif_4x_*" / subj / "*.ome.tif")
+    try:
+        tif_file = sorted(glob(new_path))[0]
+        with tifffile.TiffFile(tif_file) as tif:
+            xml_data = tif.ome_metadata
+            xml_dict = parse_xml_str(xml_data)
+            custom_attrs = xml_dict["OME"]["Image"]["ca:CustomAttributes"]
+    except:
+        raise (
+            ValueError(
+                "Cannot find OME metadata in imaris file or in associated tif files from standard folder structure"
+            )
+        )
+
+
+if not xml_dict:
+    raise ValueError("Failed to parse XML from .ims file")
+
+print(custom_attrs)
+bids_metadata = build_bids_metadata(custom_attrs)
+
+# -----------------------------
+# Write to JSON
+# -----------------------------
+with open(snakemake.output.metadata_json, "w", encoding="utf-8") as fp:
+    json.dump(bids_metadata, fp, indent=4, ensure_ascii=False)