From e4397eaf1438951deb736d513fb9f92503b4f0b6 Mon Sep 17 00:00:00 2001
From: Eduardo Hirata-Miyasaki <edhiratam@gmail.com>
Date: Mon, 13 Oct 2025 19:31:33 -0700
Subject: [PATCH 1/4] proof of concept for segmentation and extraction of
 features

---
 .../extract_features.py                       | 296 ++++++++++++++
 .../organelle_segmentation/segment_mito.py    | 381 ++++++++++++++++++
 .../segment_organelles.py                     | 295 ++++++++++++++
 3 files changed, 972 insertions(+)
 create mode 100644 applications/pseudotime_analysis/organelle_segmentation/extract_features.py
 create mode 100644 applications/pseudotime_analysis/organelle_segmentation/segment_mito.py
 create mode 100644 applications/pseudotime_analysis/organelle_segmentation/segment_organelles.py

diff --git a/applications/pseudotime_analysis/organelle_segmentation/extract_features.py b/applications/pseudotime_analysis/organelle_segmentation/extract_features.py
new file mode 100644
index 000000000..739bc1f2f
--- /dev/null
+++ b/applications/pseudotime_analysis/organelle_segmentation/extract_features.py
@@ -0,0 +1,296 @@
+import warnings as warning
+
+import numpy as np
+import pandas as pd
+from numpy.typing import ArrayLike
+from skimage import measure
+from skimage.feature import graycomatrix, graycoprops
+
+
+def extract_features_zyx(
+    labels_zyx: ArrayLike,
+    intensity_zyx: ArrayLike = None,
+    frangi_zyx: ArrayLike = None,
+    spacing: tuple = (1.0, 1.0),
+    properties: list = None,
+    extra_properties: list = None,
+):
+    """
+    Extract morphological and intensity features from labeled organelles.
+
+    Handles both 2D (Z=1) and 3D (Z>1) data automatically
+    For 2D data, processes the single Z-slice. For 3D data, performs max projection
+    along Z axis before feature extraction.
+
+    Based on:
+    Lefebvre, A.E.Y.T., Sturm, G., Lin, TY. et al.
+    Nellie (2025) https://doi.org/10.1038/s41592-025-02612-7
+
+    Parameters
+    ----------
+    labels_zyx : ndarray
+        Labeled segmentation mask with shape (Z, Y, X).
+        Each unique integer value represents a different organelle instance.
+    intensity_zyx : ndarray, optional
+        Original intensity image with shape (Z, Y, X) for computing
+        intensity-based features. If None, only morphological features computed.
+    frangi_image : ndarray, optional
+        Frangi vesselness response with shape (Z, Y, X) for computing
+        tubularity/filament features.
+    spacing : tuple
+        Physical spacing in same units (e.g., µm).
+        For 2D (Z=1): (pixel_size_y, pixel_size_x)
+        For 3D (Z>1): (pixel_size_z, pixel_size_y, pixel_size_x)
+    properties : list of str, optional
+        List of standard regionprops features to compute. If None, uses default set.
+        Available: 'label', 'area', 'perimeter', 'axis_major_length',
+        'axis_minor_length', 'solidity', 'extent', 'orientation',
+        'equivalent_diameter_area', 'convex_area', 'eccentricity',
+        'mean_intensity', 'min_intensity', 'max_intensity'
+    extra_properties : list of str, optional
+        Additional features beyond standard regionprops. Options:
+        - 'moments_hu': Hu moments (shape descriptors, 7 features)
+        - 'texture': Haralick texture features (4 features: contrast, homogeneity, energy, correlation)
+        - 'aspect_ratio': Major axis / minor axis ratio
+        - 'circularity':  area / perimeter
+        - 'frangi_intensity': Mean/min/max/sum/std of Frangi vesselness
+        - 'feret_diameter_max': Maximum Feret diameter (expensive)
+        - 'sum_intensity': Sum of intensity values
+        - 'std_intensity': Standard deviation of intensity values
+
+    Returns
+    -------
+    features_df : pd.DataFrame
+        DataFrame where each row represents one labeled object with columns
+        for each computed feature. Always includes 'label' and 'channel' columns.
+
+    Examples
+    --------
+    >>> # Basic morphology only
+    >>> df = extract_features_zyx(labels_zyx)
+
+    >>> # With intensity features
+    >>> df = extract_features_zyx(labels_zyx, intensity_zyx=intensity)
+
+    >>> # Custom property selection
+    >>> df = extract_features_zyx(
+    ...     labels_zyx,
+    ...     intensity_zyx=intensity,
+    ...     properties=['label', 'area', 'mean_intensity'],
+    ...     extra_properties=['aspect_ratio', 'circularity']
+    ... )
+
+    >>> # Full feature set including Frangi
+    >>> df = extract_features_zyx(
+    ...     labels_zyx,
+    ...     intensity_zyx=intensity,
+    ...     frangi_image=vesselness,
+    ...     extra_properties=['moments_hu', 'texture', 'frangi_intensity']
+    ... )
+    """
+
+    if intensity_zyx is not None:
+        assert (
+            intensity_zyx.shape == labels_zyx.shape
+        ), "Image and labels must have same shape"
+
+    Z, _, _ = labels_zyx.shape
+
+    # Default properties if not specified
+    if properties is None:
+        properties = [
+            "label",
+            "area",
+            "perimeter",
+            "axis_major_length",
+            "axis_minor_length",
+            "solidity",
+            "extent",
+            "orientation",
+            "equivalent_diameter_area",
+            "convex_area",
+            "eccentricity",
+        ]
+        # Add intensity features if image provided
+        if intensity_zyx is not None:
+            properties.extend(["mean_intensity", "min_intensity", "max_intensity"])
+
+    if extra_properties is None:
+        extra_properties = []
+
+    # Determine 2D vs 3D mode and set appropriate spacing
+    spacing_2d = spacing if len(spacing) == 2 else spacing[-2:]
+
+    if Z == 1:
+        # Squeeze Z dimension for 2D processing
+        labels_processed = labels_zyx[0]  # Shape: (Y, X)
+        intensity_processed = intensity_zyx[0] if intensity_zyx is not None else None
+        frangi_processed = frangi_zyx[0] if frangi_zyx is not None else None
+    else:
+        # Use max projection along Z for 3D -> 2D
+        labels_processed = np.max(labels_zyx, axis=0)  # Shape: (Y, X)
+        intensity_processed = (
+            np.max(intensity_zyx, axis=0) if intensity_zyx is not None else None
+        )
+        frangi_processed = (
+            np.max(frangi_zyx, axis=0) if frangi_zyx is not None else None
+        )
+
+    # Check if we have any objects to process
+    if labels_processed.max() == 0:
+        warning.warn(f"Warning: No objects found")
+
+    # Compute base regionprops features (those that support spacing)
+    props_with_spacing = [p for p in properties if p not in ["moments_hu"]]
+
+    try:
+        props_dict = measure.regionprops_table(
+            labels_processed,
+            intensity_image=intensity_processed,
+            properties=tuple(props_with_spacing),
+            spacing=spacing_2d,
+        )
+        df = pd.DataFrame(props_dict)
+    except Exception as e:
+        warning.warn(f"Error computing base regionprops: {e}")
+
+    # Add Hu moments separately (without spacing)
+    if "moments_hu" in properties or "moments_hu" in extra_properties:
+        try:
+            hu_props = measure.regionprops_table(
+                labels_processed, properties=("label", "moments_hu"), spacing=(1, 1)
+            )
+            hu_df = pd.DataFrame(hu_props)
+            # Rename columns to be clearer
+            hu_rename = {f"moments_hu-{i}": f"hu_moment_{i}" for i in range(7)}
+            hu_df = hu_df.rename(columns=hu_rename)
+            df = df.merge(hu_df, on="label", how="left")
+        except Exception as e:
+            warning.warn(f"Could not compute Hu moments: {e}")
+
+    # Add derived metrics
+    if "aspect_ratio" in extra_properties:
+        minor_axis = df["axis_minor_length"].replace(0, 1)  # Avoid division by zero
+        df["aspect_ratio"] = df["axis_major_length"] / minor_axis
+
+    if "circularity" in extra_properties:
+        perimeter_sq = df["perimeter"] ** 2
+        df["circularity"] = np.divide(
+            4 * np.pi * df["area"],
+            perimeter_sq,
+            out=np.ones_like(perimeter_sq),
+            where=perimeter_sq != 0,
+        )
+
+    # Add expensive/iterative features
+    if any(
+        prop in extra_properties
+        for prop in ["texture", "feret_diameter_max", "frangi_intensity"]
+    ):
+        regions = measure.regionprops(
+            labels_processed, intensity_image=intensity_processed
+        )
+        extra_features = []
+
+        for region in regions:
+            features = {"label": region.label}
+
+            # Haralick texture features
+            if "texture" in extra_properties and intensity_processed is not None:
+                min_r, min_c, max_r, max_c = region.bbox
+                region_intensity = (
+                    intensity_processed[min_r:max_r, min_c:max_c] * region.image
+                )
+
+                # Normalize to uint8
+                min_val, max_val = region_intensity.min(), region_intensity.max()
+                if max_val > min_val:
+                    region_uint8 = (
+                        (region_intensity - min_val) / (max_val - min_val) * 255
+                    ).astype(np.uint8)
+                else:
+                    region_uint8 = np.zeros_like(region_intensity, dtype=np.uint8)
+
+                try:
+                    glcm = graycomatrix(
+                        region_uint8,
+                        distances=[1],
+                        angles=[0],
+                        levels=256,
+                        symmetric=True,
+                        normed=True,
+                    )
+                    features["texture_contrast"] = graycoprops(glcm, "contrast")[0, 0]
+                    features["texture_homogeneity"] = graycoprops(glcm, "homogeneity")[
+                        0, 0
+                    ]
+                    features["texture_energy"] = graycoprops(glcm, "energy")[0, 0]
+                    features["texture_correlation"] = graycoprops(glcm, "correlation")[
+                        0, 0
+                    ]
+                except Exception:
+                    features["texture_contrast"] = np.nan
+                    features["texture_homogeneity"] = np.nan
+                    features["texture_energy"] = np.nan
+                    features["texture_correlation"] = np.nan
+
+            # Feret diameter
+            if "feret_diameter_max" in extra_properties:
+                features["feret_diameter_max"] = region.feret_diameter_max
+
+            # Frangi intensity features
+            if "frangi_intensity" in extra_properties and frangi_processed is not None:
+                min_r, min_c, max_r, max_c = region.bbox
+                region_frangi = frangi_processed[min_r:max_r, min_c:max_c][region.image]
+
+                if region_frangi.size > 0:
+                    features["frangi_mean_intensity"] = np.mean(region_frangi)
+                    features["frangi_min_intensity"] = np.min(region_frangi)
+                    features["frangi_max_intensity"] = np.max(region_frangi)
+                    features["frangi_sum_intensity"] = np.sum(region_frangi)
+                    features["frangi_std_intensity"] = np.std(region_frangi)
+                else:
+                    features["frangi_mean_intensity"] = np.nan
+                    features["frangi_min_intensity"] = np.nan
+                    features["frangi_max_intensity"] = np.nan
+                    features["frangi_sum_intensity"] = np.nan
+                    features["frangi_std_intensity"] = np.nan
+
+            extra_features.append(features)
+
+        if extra_features:
+            extra_df = pd.DataFrame(extra_features)
+            df = df.merge(extra_df, on="label", how="left")
+
+    # Add sum and std intensity if we have intensity image
+    if intensity_processed is not None and (
+        "sum_intensity" in extra_properties or "std_intensity" in extra_properties
+    ):
+        regions = measure.regionprops(
+            labels_processed, intensity_image=intensity_processed
+        )
+        sum_std_features = []
+
+        for region in regions:
+            min_r, min_c, max_r, max_c = region.bbox
+            region_pixels = intensity_processed[min_r:max_r, min_c:max_c][region.image]
+
+            features = {"label": region.label}
+            if region_pixels.size > 0:
+                if "sum_intensity" in extra_properties:
+                    features["sum_intensity"] = np.sum(region_pixels)
+                if "std_intensity" in extra_properties:
+                    features["std_intensity"] = np.std(region_pixels)
+            else:
+                if "sum_intensity" in extra_properties:
+                    features["sum_intensity"] = np.nan
+                if "std_intensity" in extra_properties:
+                    features["std_intensity"] = np.nan
+
+            sum_std_features.append(features)
+
+        if sum_std_features:
+            sum_std_df = pd.DataFrame(sum_std_features)
+            df = df.merge(sum_std_df, on="label", how="left")
+
+    return df
diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py b/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py
new file mode 100644
index 000000000..bffe9d1c0
--- /dev/null
+++ b/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py
@@ -0,0 +1,381 @@
+# %% code for organelle and nuclear segmentation and feature extraction
+
+import os
+from pathlib import Path
+
+import napari
+import numpy as np
+import pandas as pd
+from extract_features import (
+    extract_features_zyx,
+)
+from iohub import open_ome_zarr
+from matplotlib import pyplot as plt
+from segment_organelles import (
+    calculate_nellie_sigmas,
+    segment_zyx,
+)
+from skimage.exposure import rescale_intensity
+from tqdm import tqdm
+
+os.environ["DISPLAY"] = ":1"
+viewer = napari.Viewer()
+# %%
+
+# input_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/train-test/2024_11_21_A549_TOMM20_DENV.zarr"
+input_path = Path(
+    "/hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV/4-phenotyping/train-test/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV.zarr"
+)
+# input_path = "/hpc/projects/intracellular_dashboard/organelle_box/2025_04_04_organelle_box_Live_dye/4-concatenate/organelle_box_live_dye_TOMM20.zarr"
+input_zarr = open_ome_zarr(input_path, mode="r", layout="hcs")
+in_chans = input_zarr.channel_names
+organelle_channel_name = "GFP EX488 EM525-45"
+# Organelle_chan = "4-MultiCam_GFP_Cy5-BSI_Express"
+
+output_root = (
+    Path(
+        "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/infection_state/output"
+    )
+    / input_path.stem
+)
+output_root.mkdir(parents=True, exist_ok=True)
+
+# %%
+# Frangi parameters - WORKING configuration for 2D mitochondria segmentation
+frangi_params = {
+    "clahe_clip_limit": 0.01,  # Mild contrast enhancement (0.01-0.03 range)
+    "sigma_steps": 2,  # Multiple scales to capture size variation
+    "auto_optimize_sigma": False,  # Use multi-scale (max across scales)
+    "frangi_alpha": 0.5,  # Standard tubular structure sensitivity
+    "frangi_beta": 0.5,  # Standard blob rejection
+    "threshold_method": "nellie_max",  # CRITICAL: Manual threshold where auto methods fail
+    "min_object_size": 10,  # Remove small noise clusters (20-50 pixels)
+    "apply_morphology": False,  # Connect fragmented mitochondria
+}
+
+
+position_names = []
+for ds, position in input_zarr.positions():
+    position_names.append(tuple(ds.split("/")))
+
+
+for well_id, well_data in input_zarr.wells():
+    # print(well_id)
+    # well_id, well_data = next(input_zarr.wells())
+    well_name, well_no = well_id.split("/")
+
+    if "B/1" not in well_id:
+        continue
+    # if well_id == 'C/4':
+    # print(well_name, well_no)
+    for pos_id, pos_data in well_data.positions():
+        if pos_id != "000000":
+            continue
+        scale = (
+            pos_data.metadata.multiscales[0]
+            .datasets[0]
+            .coordinate_transformations[0]
+            .scale
+        )
+        pixel_size_um = scale[-1]  # XY pixel size in micrometers
+        z_spacing_um = scale[-3]  # Z spacing in micrometers
+        print(f"  Pixel size: {pixel_size_um:.4f} µm, Z spacing: {z_spacing_um:.4f} µm")
+
+        in_data = pos_data.data.numpy()
+        if in_data.shape[-3] != 1:
+            in_data = np.max(in_data, axis=-3, keepdims=True)
+        T, C, Z, Y, X = in_data.shape
+        print(f"Input data shape: {in_data.shape} (T={T}, C={C}, Z={Z}, Y={Y}, X={X})")
+
+        # Extract and normalize organelle channel (keep Z dimension)
+        organelle_data = in_data[
+            :, in_chans.index(organelle_channel_name), :
+        ]  # (T, Z, Y, X)
+        organelle_data = rescale_intensity(organelle_data, out_range=(0, 1))
+        print(f"Organelle data shape after extraction: {organelle_data.shape}")
+
+        # Calculate sigma range - ADJUSTED for your pixel size
+        # With 0.1494 µm/pixel, mitochondria (0.3-1.0 µm diameter) = 2-7 pixels diameter
+        # Sigma should be ~radius/2, so for diameter 2-7px, sigma = 0.5-1.75 px
+        min_radius_um = 0.15  # 300 nm diameter = ~2 pixels
+        max_radius_um = 0.6  # 1 µm diameter = ~6.7 pixels
+        sigma_range = calculate_nellie_sigmas(
+            min_radius_um,
+            max_radius_um,
+            pixel_size_um,
+            num_sigma=frangi_params["sigma_steps"],
+        )
+
+        print(f"Using sigma range: {sigma_range[0]:.2f} to {sigma_range[1]:.2f} pixels")
+
+        # Frangi filtering and segmentation
+        print(
+            f"Computing Frangi segmentation and feature extraction for {well_id}/{pos_id}..."
+        )
+        frangi_seg_masks = []
+        frangi_vesselness_maps = []
+        all_features = []
+
+        # FIXME: temporary for testing
+        selected_timepoints = np.linspace(0, T - 1, 3).astype(int)
+        for t in tqdm(selected_timepoints, desc="Processing timepoints"):
+            labels, vesselness, optimal_sigma = segment_zyx(
+                organelle_data[t], sigma_range=sigma_range, **frangi_params
+            )
+            frangi_seg_masks.append(labels[0])
+            frangi_vesselness_maps.append(vesselness[0])
+
+            # Extract features from this timepoint
+            features_df = extract_features_zyx(
+                labels_zyx=labels,
+                intensity_zyx=organelle_data[t],
+                frangi_zyx=vesselness,
+                spacing=(pixel_size_um, pixel_size_um),
+                extra_properties=[
+                    "aspect_ratio",
+                    "circularity",
+                    "frangi_intensity",
+                    # "texture",
+                    # "moments_hu",
+                ],
+            )
+
+            if not features_df.empty:
+                features_df["well_id"] = well_id
+                features_df["position_id"] = pos_id
+                features_df["timepoint"] = t
+                all_features.append(features_df)
+
+        frangi_seg_masks = np.array(frangi_seg_masks)
+        frangi_vesselness_maps = np.array(frangi_vesselness_maps)
+
+        # Save combined features
+        if all_features:
+            combined_features = pd.concat(all_features, ignore_index=True)
+            output_csv = output_root / f"features_{well_name}_{well_no}_{pos_id}.csv"
+            combined_features.to_csv(output_csv, index=False)
+            print(f"  Saved {len(combined_features)} object features to {output_csv}")
+
+        # Convert to output format (T_actual, C=1, Z, Y, X)
+        T_actual = frangi_seg_masks.shape[0]
+        out_data = frangi_seg_masks[:, :, :].astype(np.uint32)
+        print(f"  Processed {T_actual} timepoints, output shape: {out_data.shape}")
+
+        position_key = (well_name, well_no, pos_id)
+
+# %%
+
+viewer.add_image(organelle_data[selected_timepoints, 0])
+viewer.add_labels(frangi_seg_masks)
+
+
+# %%
+# Plot mitochondrial dynamics: elongation and fragmentation
+
+if all_features:
+    df = combined_features
+
+    # Aggregate features per timepoint
+    timepoint_summary = (
+        df.groupby("timepoint")
+        .agg(
+            {
+                "label": "count",  # Number of mitochondrial objects
+                "area": ["mean", "median", "sum"],  # Size metrics
+                "aspect_ratio": ["mean", "median"],  # Elongation metric
+                "circularity": ["mean", "median"],  # Shape metric
+                "frangi_mean_intensity": ["mean", "median"],  # Tubularity metric
+                # "moments_hu_1": ["mean", "median"],  # Shape descriptor
+                # "moments_hu_2": ["mean", "median"],  # Shape descriptor
+                # "moments_hu_3": ["mean", "median"],  # Shape descriptor
+                # "moments_hu_4": ["mean", "median"],  # Shape descriptor
+                # "contrast": ["mean", "median"],  # Texture metric
+            }
+        )
+        .reset_index()
+    )
+
+    # Flatten column names
+    timepoint_summary.columns = [
+        "_".join(col).strip("_") for col in timepoint_summary.columns.values
+    ]
+
+    # Create figure with subplots
+    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
+    fig.suptitle(
+        f"Mitochondrial Dynamics: {well_id}/{pos_id}", fontsize=14, fontweight="bold"
+    )
+
+    # Plot 1: Number of objects (fragmentation indicator)
+    ax = axes[0, 0]
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["label_count"],
+        marker="o",
+        linewidth=2,
+        markersize=8,
+        color="#1f77b4",
+    )
+    ax.set_xlabel("Timepoint", fontsize=11)
+    ax.set_ylabel("Number of Objects", fontsize=11)
+    ax.set_title("Fragmentation (Object Count)", fontsize=12, fontweight="bold")
+    ax.grid(True, alpha=0.3)
+
+    # Plot 2: Mean area per object
+    ax = axes[0, 1]
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["area_mean"],
+        marker="o",
+        linewidth=2,
+        markersize=8,
+        color="#ff7f0e",
+        label="Mean",
+    )
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["area_median"],
+        marker="s",
+        linewidth=2,
+        markersize=7,
+        color="#d62728",
+        label="Median",
+        alpha=0.7,
+    )
+    ax.set_xlabel("Timepoint", fontsize=11)
+    ax.set_ylabel("Area (µm²)", fontsize=11)
+    ax.set_title("Mitochondrial Size", fontsize=12, fontweight="bold")
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+
+    # Plot 3: Total area (network coverage)
+    ax = axes[0, 2]
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["area_sum"],
+        marker="o",
+        linewidth=2,
+        markersize=8,
+        color="#2ca02c",
+    )
+    ax.set_xlabel("Timepoint", fontsize=11)
+    ax.set_ylabel("Total Area (µm²)", fontsize=11)
+    ax.set_title("Total Mitochondrial Coverage", fontsize=12, fontweight="bold")
+    ax.grid(True, alpha=0.3)
+
+    # Plot 4: Aspect ratio (elongation)
+    ax = axes[1, 0]
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["aspect_ratio_mean"],
+        marker="o",
+        linewidth=2,
+        markersize=8,
+        color="#9467bd",
+        label="Mean",
+    )
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["aspect_ratio_median"],
+        marker="s",
+        linewidth=2,
+        markersize=7,
+        color="#8c564b",
+        label="Median",
+        alpha=0.7,
+    )
+    ax.set_xlabel("Timepoint", fontsize=11)
+    ax.set_ylabel("Aspect Ratio", fontsize=11)
+    ax.set_title("Elongation (Aspect Ratio)", fontsize=12, fontweight="bold")
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+
+    # Plot 5: Circularity
+    ax = axes[1, 1]
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["circularity_mean"],
+        marker="o",
+        linewidth=2,
+        markersize=8,
+        color="#e377c2",
+        label="Mean",
+    )
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["circularity_median"],
+        marker="s",
+        linewidth=2,
+        markersize=7,
+        color="#7f7f7f",
+        label="Median",
+        alpha=0.7,
+    )
+    ax.set_xlabel("Timepoint", fontsize=11)
+    ax.set_ylabel("Circularity", fontsize=11)
+    ax.set_title("Shape Circularity", fontsize=12, fontweight="bold")
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+
+    # Plot 6: Frangi vesselness (tubularity)
+    ax = axes[1, 2]
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["frangi_mean_intensity_mean"],
+        marker="o",
+        linewidth=2,
+        markersize=8,
+        color="#bcbd22",
+        label="Mean",
+    )
+    ax.plot(
+        timepoint_summary["timepoint"],
+        timepoint_summary["frangi_mean_intensity_median"],
+        marker="s",
+        linewidth=2,
+        markersize=7,
+        color="#17becf",
+        label="Median",
+        alpha=0.7,
+    )
+    ax.set_xlabel("Timepoint", fontsize=11)
+    ax.set_ylabel("Frangi Vesselness", fontsize=11)
+    ax.set_title("Tubularity (Frangi)", fontsize=12, fontweight="bold")
+    ax.legend()
+    ax.grid(True, alpha=0.3)
+
+    plt.tight_layout()
+
+    # Save figure
+    output_fig = output_root / f"dynamics_{well_name}_{well_no}_{pos_id}.png"
+    plt.savefig(output_fig, dpi=300, bbox_inches="tight")
+    print(f"  Saved dynamics plot to {output_fig}")
+
+    plt.show()
+
+    # Print summary statistics
+    print(f"\n=== Mitochondrial Dynamics Summary ===")
+    print(f"Position: {well_id}/{pos_id}")
+    print(f"\nTimepoint range: {selected_timepoints[0]} -> {selected_timepoints[-1]}")
+    print(f"\nFragmentation (Object Count):")
+    print(f"  Start: {timepoint_summary['label_count'].iloc[0]:.0f} objects")
+    print(f"  End: {timepoint_summary['label_count'].iloc[-1]:.0f} objects")
+    print(
+        f"  Change: {timepoint_summary['label_count'].iloc[-1] - timepoint_summary['label_count'].iloc[0]:+.0f} ({(timepoint_summary['label_count'].iloc[-1]/timepoint_summary['label_count'].iloc[0] - 1)*100:+.1f}%)"
+    )
+
+    print(f"\nElongation (Aspect Ratio):")
+    print(f"  Start: {timepoint_summary['aspect_ratio_mean'].iloc[0]:.2f}")
+    print(f"  End: {timepoint_summary['aspect_ratio_mean'].iloc[-1]:.2f}")
+    print(
+        f"  Change: {timepoint_summary['aspect_ratio_mean'].iloc[-1] - timepoint_summary['aspect_ratio_mean'].iloc[0]:+.2f} ({(timepoint_summary['aspect_ratio_mean'].iloc[-1]/timepoint_summary['aspect_ratio_mean'].iloc[0] - 1)*100:+.1f}%)"
+    )
+
+    print(f"\nMean Object Size (Area):")
+    print(f"  Start: {timepoint_summary['area_mean'].iloc[0]:.2f} µm²")
+    print(f"  End: {timepoint_summary['area_mean'].iloc[-1]:.2f} µm²")
+    print(
+        f"  Change: {timepoint_summary['area_mean'].iloc[-1] - timepoint_summary['area_mean'].iloc[0]:+.2f} µm² ({(timepoint_summary['area_mean'].iloc[-1]/timepoint_summary['area_mean'].iloc[0] - 1)*100:+.1f}%)"
+    )
+
+# %%
diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_organelles.py b/applications/pseudotime_analysis/organelle_segmentation/segment_organelles.py
new file mode 100644
index 000000000..be255520e
--- /dev/null
+++ b/applications/pseudotime_analysis/organelle_segmentation/segment_organelles.py
@@ -0,0 +1,295 @@
+import logging
+
+import numpy as np
+from numpy.typing import ArrayLike
+from skimage import measure, morphology
+from skimage.exposure import equalize_adapthist
+from skimage.filters import frangi, threshold_otsu, threshold_triangle
+
+_logger = logging.getLogger("viscy")
+
+
+def segment_zyx(
+    input_zyx: ArrayLike,
+    clahe_kernel_size=None,
+    clahe_clip_limit=0.01,
+    sigma_range=(0.5, 3.0),
+    sigma_steps=5,
+    auto_optimize_sigma=True,
+    frangi_alpha=0.5,
+    frangi_beta=0.5,
+    frangi_gamma=None,
+    threshold_method="otsu",
+    min_object_size=10,
+    apply_morphology=True,
+):
+    """
+    Segment mitochondria from a 2D or 3D input_zyx using CLAHE preprocessing,
+    Frangi filtering, and connected component labeling.
+
+    Based on:
+    Lefebvre, A.E.Y.T., Sturm, G., Lin, TY. et al.
+    Nellie (2025) https://doi.org/10.1038/s41592-025-02612-7
+
+    Parameters
+    ----------
+    input_zyx : ndarray
+        Input image with shape (Z, Y, X) for 3D.
+        If 2D, uses 2D Frangi filter. If 3D with Z=1, squeezes to 2D.
+    clahe_kernel_size : int or None
+        Kernel size for CLAHE (Contrast Limited Adaptive Histogram Equalization).
+        If None, automatically set to max(input_zyx.shape) // 8.
+    clahe_clip_limit : float
+        Clipping limit for CLAHE, normalized between 0 and 1 (default: 0.01).
+    sigma_range : tuple of float
+        Range of sigma values to test for Frangi filter (min_sigma, max_sigma).
+        Represents the scale of structures to detect.
+    sigma_steps : int
+        Number of sigma values to test in the range.
+    auto_optimize_sigma : bool
+        If True, automatically finds optimal sigma by maximizing vesselness response.
+        If False, uses all sigmas in range for multi-scale filtering.
+    frangi_alpha : float
+        Frangi filter sensitivity to plate-j    like structures (2D) or blob-like (3D).
+    frangi_beta : float
+        Frangi filter sensitivity to blob-like structures (2D) or tube-like (3D).
+    frangi_gamma : float or None
+        Frangi filter sensitivity to background noise. If None, auto-computed.
+    threshold_method : str
+        Thresholding method: 'otsu', 'triangle', 'percentile', 'manual_X'.
+    min_object_size : int
+        Minimum object size in pixels for connected components.
+    apply_morphology : bool
+        If True, applies morphological closing to connect nearby structures.
+
+    Returns
+    -------
+    labels : ndarray
+        Instance segmentation labels with same dimensionality as input.
+    vesselness : ndarray
+        Filtered vesselness response with same dimensionality as input.
+    optimal_sigma : float or None
+        The optimal sigma value if auto_optimize_sigma=True, else None.
+    """
+
+    assert input_zyx.ndim == 3
+    Z, Y, X = input_zyx.shape[-3:]
+
+    if clahe_kernel_size is None:
+        clahe_kernel_size = max(Z, Y, X) // 8
+
+    # Apply CLAHE for contrast enhancement
+    enhanced_zyx = equalize_adapthist(
+        input_zyx,
+        kernel_size=clahe_kernel_size,
+        clip_limit=clahe_clip_limit,
+    )
+
+    # Generate sigma values
+    sigmas = np.linspace(sigma_range[0], sigma_range[1], sigma_steps)
+
+    # Auto-optimize sigma or use multi-scale
+    if auto_optimize_sigma:
+        optimal_sigma, vesselness = _find_optimal_sigma(
+            enhanced_zyx, sigmas, frangi_alpha, frangi_beta, frangi_gamma
+        )
+    else:
+        optimal_sigma = None
+        vesselness = _multiscale_frangi(
+            enhanced_zyx, sigmas, frangi_alpha, frangi_beta, frangi_gamma
+        )
+
+    # Threshold the vesselness response
+    if threshold_method == "otsu":
+        threshold = threshold_otsu(vesselness)
+        _logger.debug(f"Otsu threshold: {threshold:.4f}")
+    elif threshold_method == "triangle":
+        threshold = threshold_triangle(vesselness)
+        _logger.debug(f"Triangle threshold: {threshold:.4f}")
+    elif threshold_method == "nellie_min":
+        threshold_otsu_val = threshold_otsu(vesselness)
+        threshold_triangle_val = threshold_triangle(vesselness)
+        threshold = min(threshold_otsu_val, threshold_triangle_val)
+        _logger.debug(
+            f"Nellie-min threshold: otsu={threshold_otsu_val:.4f}, triangle={threshold_triangle_val:.4f}, using min={threshold:.4f}"
+        )
+    elif threshold_method == "nellie_max":
+        threshold_otsu_val = threshold_otsu(vesselness)
+        threshold_triangle_val = threshold_triangle(vesselness)
+        threshold = max(threshold_otsu_val, threshold_triangle_val)
+        _logger.debug(
+            f"Nellie-max threshold: otsu={threshold_otsu_val:.4f}, triangle={threshold_triangle_val:.4f}, using max={threshold:.4f}"
+        )
+    elif threshold_method == "percentile":
+        # Use percentile-based threshold (good for sparse features)
+        threshold = np.percentile(vesselness[vesselness > 0], 95)  # Keep top 5%
+        _logger.debug(f"Percentile (95th) threshold: {threshold:.4f}")
+    elif threshold_method.startswith("manual_"):
+        # Manual threshold: "manual_0.05" means threshold at 0.05
+        threshold = float(threshold_method.split("_")[1])
+        _logger.debug(f"Manual threshold: {threshold:.4f}")
+    else:
+        raise ValueError(f"Unknown threshold method: {threshold_method}")
+
+    binary_mask = vesselness > threshold
+
+    _logger.debug(
+        f"    Selected {binary_mask.sum()} / {binary_mask.size} pixels ({100*binary_mask.sum()/binary_mask.size:.2f}%)"
+    )
+
+    # Apply morphological operations
+    if apply_morphology:
+        binary_mask = morphology.binary_closing(
+            binary_mask, footprint=morphology.ball(1)
+        )
+        binary_mask = morphology.remove_small_holes(binary_mask, area_threshold=64)
+
+    # Label connected components
+    labels = measure.label(binary_mask, connectivity=2)
+
+    # Remove small objects
+    labels = morphology.remove_small_objects(labels, min_size=min_object_size)
+
+    if Z == 1:
+        labels = labels[np.newaxis, ...]
+        vesselness = vesselness[np.newaxis, ...]
+
+    return labels, vesselness, optimal_sigma
+
+
+def _find_optimal_sigma(input_zyx, sigmas, alpha, beta, gamma):
+    """
+    Find the optimal sigma that maximizes the vesselness response.
+
+    Parameters
+    ----------
+    input_zyx : ndarray
+         3D input_zyx (Z, Y, X).
+    sigmas : array-like
+        Sigma values to test.
+    alpha, beta, gamma : float
+        Frangi filter parameters.
+
+    Returns
+    -------
+    optimal_sigma : float
+        The sigma with the highest mean vesselness response.
+    vesselness : ndarray
+        The vesselness response using optimal sigma.
+    """
+    best_sigma = sigmas[0]
+    best_score = -np.inf
+    best_vesselness = None
+
+    if input_zyx.shape[0] == 1:
+        input_zyx = input_zyx[0]
+
+    for sigma in sigmas:
+        vessel = frangi(
+            input_zyx,
+            sigmas=[sigma],
+            alpha=alpha,
+            beta=beta,
+            gamma=gamma,
+            black_ridges=False,
+        )
+
+        # Score is the mean of top 10% vesselness values
+        score = np.mean(
+            np.partition(vessel.ravel(), -int(0.1 * vessel.size))[
+                -int(0.1 * vessel.size) :
+            ]
+        )
+
+        if score > best_score:
+            best_score = score
+            best_sigma = sigma
+            best_vesselness = vessel
+
+    if input_zyx.shape[0] == 1:
+        best_vesselness = best_vesselness[np.newaxis, ...]
+
+    return best_sigma, best_vesselness
+
+
+def _multiscale_frangi(
+    input_zyx, sigmas: ArrayLike, alpha: float, beta: float, gamma: float
+):
+    """
+    Apply Frangi filter at multiple scales and return the maximum response.
+
+    Parameters
+    ----------
+    input_zyx : ndarray
+        3D input_zyx (Z, Y, X).
+    sigmas : array-like
+        Sigma values for multi-scale filtering.
+    alpha, beta, gamma : float
+        Frangi filter parameters.
+
+    Returns
+    -------
+    vesselness : ndarray
+        Maximum vesselness response across all scales.
+    """
+    if input_zyx.shape[0] == 1:
+        input_zyx = input_zyx[0]
+    vesselness = frangi(
+        input_zyx,
+        sigmas=sigmas,
+        alpha=alpha,
+        beta=beta,
+        gamma=gamma,
+        black_ridges=False,
+    )
+    if input_zyx.shape[0] == 1:
+        vesselness = vesselness[np.newaxis, ...]
+    return vesselness
+
+
+def calculate_nellie_sigmas(
+    min_radius_um, max_radius_um, pixel_size_um, num_sigma=5, min_step_size_px=0.2
+):
+    """
+    Calculate sigma values following Nellie's approach.
+
+    Parameters
+    ----------
+    min_radius_um : float
+        Minimum structure radius in micrometers (e.g., 0.2 for diffraction limit)
+    max_radius_um : float
+        Maximum structure radius in micrometers (e.g., 1.0 for thick tubules)
+    pixel_size_um : float
+        Pixel size in micrometers
+    num_sigma : int
+        Target number of sigma values
+    min_step_size_px : float
+        Minimum step size between sigmas in pixels
+
+    Returns
+    -------
+    tuple : (sigma_min, sigma_max)
+        Sigma range in pixels
+    """
+    min_radius_px = min_radius_um / pixel_size_um
+    max_radius_px = max_radius_um / pixel_size_um
+
+    # Nellie uses radius/2 to radius/3 as sigma
+    sigma_1 = min_radius_px / 2
+    sigma_2 = max_radius_px / 3
+    sigma_min = min(sigma_1, sigma_2)
+    sigma_max = max(sigma_1, sigma_2)
+
+    # Calculate step size with minimum constraint
+    sigma_step_calculated = (sigma_max - sigma_min) / num_sigma
+    sigma_step = max(min_step_size_px, sigma_step_calculated)
+
+    sigmas = list(np.arange(sigma_min, sigma_max + sigma_step, sigma_step))
+
+    _logger.debug(f"  Nellie-style sigmas: {sigma_min:.3f} to {sigma_max:.3f} pixels")
+    _logger.debug(
+        f"  Radius range: {min_radius_um:.3f}-{max_radius_um:.3f} µm = {min_radius_px:.2f}-{max_radius_px:.2f} pixels"
+    )
+    _logger.debug(f"  Sigma values: {[f'{s:.2f}' for s in sigmas]}")
+
+    return (sigma_min, sigma_max)

From 0d5a46ab6ebbbe6fea515c07bd05e7c3eb6b7f17 Mon Sep 17 00:00:00 2001
From: Eduardo Hirata-Miyasaki <edhiratam@gmail.com>
Date: Mon, 13 Oct 2025 19:34:16 -0700
Subject: [PATCH 2/4] update folder output for demo file

---
 .../pseudotime_analysis/organelle_segmentation/segment_mito.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py b/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py
index bffe9d1c0..4f68ca9f7 100644
--- a/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py
+++ b/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py
@@ -34,7 +34,7 @@
 
 output_root = (
     Path(
-        "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/infection_state/output"
+        "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/organelle_segmentation/output"
     )
     / input_path.stem
 )

From 6d2e5c9d1c6032d5e3815d55116284e4e49735c0 Mon Sep 17 00:00:00 2001
From: Eduardo Hirata-Miyasaki <edhiratam@gmail.com>
Date: Mon, 13 Oct 2025 20:32:24 -0700
Subject: [PATCH 3/4] code to generate the pandas dataframe per position

---
 .../organelle_segmentation/segment_mito_sc.py | 164 ++++++++++++++++++
 1 file changed, 164 insertions(+)
 create mode 100644 applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py

diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py b/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py
new file mode 100644
index 000000000..202251ac0
--- /dev/null
+++ b/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py
@@ -0,0 +1,164 @@
+# %%
+import os
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from extract_features import (
+    extract_features_zyx,
+)
+from iohub import open_ome_zarr
+from matplotlib import pyplot as plt
+from segment_organelles import (
+    calculate_nellie_sigmas,
+    segment_zyx,
+)
+from skimage.exposure import rescale_intensity
+from tqdm import tqdm
+
+# %%
+
+input_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/train-test/2024_11_21_A549_TOMM20_DENV.zarr"
+input_zarr = open_ome_zarr(input_path, mode="r")
+in_chans = input_zarr.channel_names
+organelle_channel = "GFP EX488 EM525-45"
+
+org_seg_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/1-predictions/quantify_remodeling/segmentation_TOMM20_filaments.zarr"
+seg_zarr = open_ome_zarr(org_seg_path, mode="r")
+seg_chans = seg_zarr.channel_names
+organelle_labels = "Organelle_mask"
+
+nuclear_labels_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/1-preprocess/label-free/3-track/2024_11_21_A549_TOMM20_DENV_cropped.zarr"
+nuclear_labels_zarr = open_ome_zarr(nuclear_labels_path, mode="r")
+nuclear_labels_chans = nuclear_labels_zarr.channel_names
+nuclear_labels = "nuclei_prediction_labels_labels"
+
+output_root = Path(
+    "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/organelle_segmentation/output"
+)
+output_csv = output_root / "2024_11_21_A549_TOMM20_DENV/organelle_seg_features.csv"
+
+
+# %% library of feature extraction methods
+def get_patch(data, cell_centroid, PATCH_SIZE):
+
+    x_centroid, y_centroid = cell_centroid
+    # ensure patch boundaries stay within data dimensions
+    x_start = max(0, x_centroid - PATCH_SIZE // 2)
+    x_end = min(data.shape[1], x_centroid + PATCH_SIZE // 2)
+    y_start = max(0, y_centroid - PATCH_SIZE // 2)
+    y_end = min(data.shape[0], y_centroid + PATCH_SIZE // 2)
+
+    # get patch of PATCH_SIZE centered on centroid
+    patch = data[int(y_start) : int(y_end), int(x_start) : int(x_end)]
+    return patch
+
+
+# %%
+PATCH_SIZE = 160
+frangi_params = {
+    "clahe_clip_limit": 0.01,  # Mild contrast enhancement (0.01-0.03 range)
+    "sigma_steps": 2,  # Multiple scales to capture size variation
+    "auto_optimize_sigma": False,  # Use multi-scale (max across scales)
+    "frangi_alpha": 0.5,  # Standard tubular structure sensitivity
+    "frangi_beta": 0.5,  # Standard blob rejection
+    "threshold_method": "nellie_max",  # CRITICAL: Manual threshold where auto methods fail
+    "min_object_size": 10,  # Remove small noise clusters (20-50 pixels)
+    "apply_morphology": False,  # Connect fragmented mitochondria
+}
+
+
+for well_id, well_data in tqdm(seg_zarr.wells(), desc="Wells", position=0):
+    # well_id, well_data = next(seg_zarr.wells())
+    well_name, well_no = well_id.split("/")
+    # if well_id in wells_to_process:
+    for pos_id, pos_data in tqdm(
+        well_data.positions(), desc=f"Positions in {well_id}", position=1, leave=False
+    ):
+        # pos_id, pos_data = next(well_data.positions())
+        T, C, Z, Y, X = pos_data.data.shape
+        seg_data = pos_data.data.numpy()
+        org_seg_mask = seg_data[:, seg_chans.index(organelle_labels)]
+
+        # read the csv stored in each nucl seg zarr folder
+        file_name = "tracks_" + well_name + "_" + well_no + "_" + pos_id + ".csv"
+        nuclear_labels_csv = os.path.join(
+            nuclear_labels_path, well_id + "/" + pos_id + "/" + file_name
+        )
+        nuclear_labels_df = pd.read_csv(nuclear_labels_csv)
+
+        in_data = input_zarr[well_id + "/" + pos_id].data.numpy()
+        scale_um = input_zarr[well_id + "/" + pos_id].scale
+        organelle_data = in_data[:, in_chans.index(organelle_channel)]
+
+        # Initialize an empty list to store values from each row of the csv
+        position_features = []
+        for idx, row in nuclear_labels_df.iterrows():
+
+            if (
+                row["x"] > PATCH_SIZE // 2
+                and row["y"] > PATCH_SIZE // 2
+                and row["x"] < X - PATCH_SIZE // 2
+                and row["y"] < Y - PATCH_SIZE // 2
+            ):
+                cell_centroid = row["x"], row["y"]
+
+                timepoint = row["t"]
+                organelle_patch = get_patch(
+                    organelle_data[int(timepoint), 0], cell_centroid, PATCH_SIZE
+                )
+
+                #
+                organelle_patch = organelle_patch[np.newaxis]
+                organelle_patch = rescale_intensity(
+                    organelle_patch,
+                    out_range=(0, 1),
+                )
+                # FIXME set some defualt for the mitochondria and other organelles
+                min_radius_um = 0.15  # 300 nm diameter = ~2 pixels
+                max_radius_um = 0.6  # 1 µm diameter = ~6.7 pixels
+                sigma_range = calculate_nellie_sigmas(
+                    min_radius_um,
+                    max_radius_um,
+                    scale_um[-1],  # use the highest res axis (XY)
+                    num_sigma=frangi_params["sigma_steps"],
+                )
+
+                labels, vesselness, optimal_sigma = segment_zyx(
+                    organelle_patch, sigma_range=sigma_range, **frangi_params
+                )
+                features_df = extract_features_zyx(
+                    labels_zyx=labels,
+                    intensity_zyx=organelle_patch,
+                    frangi_zyx=vesselness,
+                    spacing=(scale_um[-1], scale_um[-1]),
+                    extra_properties=[
+                        "aspect_ratio",
+                        "circularity",
+                        "frangi_intensity",
+                        "texture",
+                        "moments_hu",
+                    ],
+                )
+
+                if not features_df.empty:
+                    features_df["fov_name"] = "/" + well_id + "/" + pos_id
+                    features_df["track_id"] = row["track_id"]
+                    features_df["t"] = timepoint
+                    features_df["x"] = row["x"]
+                    features_df["y"] = row["y"]
+
+                position_features.append(features_df)
+
+        if position_features:
+            # Concatenate the list of DataFrames
+            position_df = pd.concat(position_features, ignore_index=True)
+            position_df.to_csv(
+                output_csv,
+                mode="a",
+                header=not os.path.exists(output_csv),
+                index=False,
+            )
+
+
+# %%

From c2160c6cccc3e8976e0b4c4c77d8b40a3f9eb19a Mon Sep 17 00:00:00 2001
From: Eduardo Hirata-Miyasaki <edhiratam@gmail.com>
Date: Tue, 14 Oct 2025 11:02:24 -0700
Subject: [PATCH 4/4] parallelized segment mito features

---
 .../organelle_segmentation/segment_mito_sc.py | 293 ++++++++++--------
 1 file changed, 168 insertions(+), 125 deletions(-)

diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py b/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py
index 202251ac0..df60ebde8 100644
--- a/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py
+++ b/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py
@@ -8,6 +8,7 @@
     extract_features_zyx,
 )
 from iohub import open_ome_zarr
+from joblib import Parallel, delayed
 from matplotlib import pyplot as plt
 from segment_organelles import (
     calculate_nellie_sigmas,
@@ -16,28 +17,6 @@
 from skimage.exposure import rescale_intensity
 from tqdm import tqdm
 
-# %%
-
-input_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/train-test/2024_11_21_A549_TOMM20_DENV.zarr"
-input_zarr = open_ome_zarr(input_path, mode="r")
-in_chans = input_zarr.channel_names
-organelle_channel = "GFP EX488 EM525-45"
-
-org_seg_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/1-predictions/quantify_remodeling/segmentation_TOMM20_filaments.zarr"
-seg_zarr = open_ome_zarr(org_seg_path, mode="r")
-seg_chans = seg_zarr.channel_names
-organelle_labels = "Organelle_mask"
-
-nuclear_labels_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/1-preprocess/label-free/3-track/2024_11_21_A549_TOMM20_DENV_cropped.zarr"
-nuclear_labels_zarr = open_ome_zarr(nuclear_labels_path, mode="r")
-nuclear_labels_chans = nuclear_labels_zarr.channel_names
-nuclear_labels = "nuclei_prediction_labels_labels"
-
-output_root = Path(
-    "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/organelle_segmentation/output"
-)
-output_csv = output_root / "2024_11_21_A549_TOMM20_DENV/organelle_seg_features.csv"
-
 
 # %% library of feature extraction methods
 def get_patch(data, cell_centroid, PATCH_SIZE):
@@ -54,111 +33,175 @@ def get_patch(data, cell_centroid, PATCH_SIZE):
     return patch
 
 
-# %%
-PATCH_SIZE = 160
-frangi_params = {
-    "clahe_clip_limit": 0.01,  # Mild contrast enhancement (0.01-0.03 range)
-    "sigma_steps": 2,  # Multiple scales to capture size variation
-    "auto_optimize_sigma": False,  # Use multi-scale (max across scales)
-    "frangi_alpha": 0.5,  # Standard tubular structure sensitivity
-    "frangi_beta": 0.5,  # Standard blob rejection
-    "threshold_method": "nellie_max",  # CRITICAL: Manual threshold where auto methods fail
-    "min_object_size": 10,  # Remove small noise clusters (20-50 pixels)
-    "apply_morphology": False,  # Connect fragmented mitochondria
-}
-
-
-for well_id, well_data in tqdm(seg_zarr.wells(), desc="Wells", position=0):
-    # well_id, well_data = next(seg_zarr.wells())
+def process_position(
+    well_id,
+    pos_id,
+    input_path,
+    nuclear_labels_path,
+    organelle_channel,
+    patch_size,
+    frangi_params,
+):
+    """Process a single position and return the features DataFrame."""
+    # Open zarr stores (each worker needs its own file handles)
+    input_zarr = open_ome_zarr(input_path, mode="r")
+
     well_name, well_no = well_id.split("/")
-    # if well_id in wells_to_process:
-    for pos_id, pos_data in tqdm(
-        well_data.positions(), desc=f"Positions in {well_id}", position=1, leave=False
-    ):
-        # pos_id, pos_data = next(well_data.positions())
-        T, C, Z, Y, X = pos_data.data.shape
-        seg_data = pos_data.data.numpy()
-        org_seg_mask = seg_data[:, seg_chans.index(organelle_labels)]
-
-        # read the csv stored in each nucl seg zarr folder
-        file_name = "tracks_" + well_name + "_" + well_no + "_" + pos_id + ".csv"
-        nuclear_labels_csv = os.path.join(
-            nuclear_labels_path, well_id + "/" + pos_id + "/" + file_name
-        )
-        nuclear_labels_df = pd.read_csv(nuclear_labels_csv)
-
-        in_data = input_zarr[well_id + "/" + pos_id].data.numpy()
-        scale_um = input_zarr[well_id + "/" + pos_id].scale
-        organelle_data = in_data[:, in_chans.index(organelle_channel)]
-
-        # Initialize an empty list to store values from each row of the csv
-        position_features = []
-        for idx, row in nuclear_labels_df.iterrows():
-
-            if (
-                row["x"] > PATCH_SIZE // 2
-                and row["y"] > PATCH_SIZE // 2
-                and row["x"] < X - PATCH_SIZE // 2
-                and row["y"] < Y - PATCH_SIZE // 2
-            ):
-                cell_centroid = row["x"], row["y"]
-
-                timepoint = row["t"]
-                organelle_patch = get_patch(
-                    organelle_data[int(timepoint), 0], cell_centroid, PATCH_SIZE
-                )
-
-                #
-                organelle_patch = organelle_patch[np.newaxis]
-                organelle_patch = rescale_intensity(
-                    organelle_patch,
-                    out_range=(0, 1),
-                )
-                # FIXME set some defualt for the mitochondria and other organelles
-                min_radius_um = 0.15  # 300 nm diameter = ~2 pixels
-                max_radius_um = 0.6  # 1 µm diameter = ~6.7 pixels
-                sigma_range = calculate_nellie_sigmas(
-                    min_radius_um,
-                    max_radius_um,
-                    scale_um[-1],  # use the highest res axis (XY)
-                    num_sigma=frangi_params["sigma_steps"],
-                )
-
-                labels, vesselness, optimal_sigma = segment_zyx(
-                    organelle_patch, sigma_range=sigma_range, **frangi_params
-                )
-                features_df = extract_features_zyx(
-                    labels_zyx=labels,
-                    intensity_zyx=organelle_patch,
-                    frangi_zyx=vesselness,
-                    spacing=(scale_um[-1], scale_um[-1]),
-                    extra_properties=[
-                        "aspect_ratio",
-                        "circularity",
-                        "frangi_intensity",
-                        "texture",
-                        "moments_hu",
-                    ],
-                )
-
-                if not features_df.empty:
-                    features_df["fov_name"] = "/" + well_id + "/" + pos_id
-                    features_df["track_id"] = row["track_id"]
-                    features_df["t"] = timepoint
-                    features_df["x"] = row["x"]
-                    features_df["y"] = row["y"]
-
-                position_features.append(features_df)
-
-        if position_features:
-            # Concatenate the list of DataFrames
-            position_df = pd.concat(position_features, ignore_index=True)
-            position_df.to_csv(
-                output_csv,
-                mode="a",
-                header=not os.path.exists(output_csv),
-                index=False,
+
+    # Load position data
+    position = input_zarr[well_id + "/" + pos_id]
+    T, C, Z, Y, X = position.data.shape
+    channel_names = position.channel_names
+    scale_um = position.scale
+
+    in_data = position.data.numpy()
+
+    # Read the csv stored in each nucl seg zarr folder
+    file_name = "tracks_" + well_name + "_" + well_no + "_" + pos_id + ".csv"
+    nuclear_labels_csv = os.path.join(
+        nuclear_labels_path, well_id + "/" + pos_id + "/" + file_name
+    )
+    nuclear_labels_df = pd.read_csv(nuclear_labels_csv)
+    organelle_data = in_data[:, channel_names.index(organelle_channel)]
+
+    # Initialize an empty list to store values from each row of the csv
+    position_features = []
+    for idx, row in nuclear_labels_df.iterrows():
+
+        if (
+            row["x"] > patch_size // 2
+            and row["y"] > patch_size // 2
+            and row["x"] < X - patch_size // 2
+            and row["y"] < Y - patch_size // 2
+        ):
+            cell_centroid = row["x"], row["y"]
+
+            timepoint = row["t"]
+            organelle_patch = get_patch(
+                organelle_data[int(timepoint), 0], cell_centroid, patch_size
             )
 
+            organelle_patch = organelle_patch[np.newaxis]
+            organelle_patch = rescale_intensity(
+                organelle_patch,
+                out_range=(0, 1),
+            )
+            min_radius_um = 0.15  # 300 nm diameter = ~2 pixels
+            max_radius_um = 0.6  # 1 µm diameter = ~6.7 pixels
+            sigma_range = calculate_nellie_sigmas(
+                min_radius_um,
+                max_radius_um,
+                scale_um[-1],  # use the highest res axis (XY)
+                num_sigma=frangi_params["sigma_steps"],
+            )
+
+            labels, vesselness, optimal_sigma = segment_zyx(
+                organelle_patch, sigma_range=sigma_range, **frangi_params
+            )
+            features_df = extract_features_zyx(
+                labels_zyx=labels,
+                intensity_zyx=organelle_patch,
+                frangi_zyx=vesselness,
+                spacing=(scale_um[-1], scale_um[-1]),
+                extra_properties=[
+                    "aspect_ratio",
+                    "circularity",
+                    "frangi_intensity",
+                    "texture",
+                    "moments_hu",
+                ],
+            )
+
+            if not features_df.empty:
+                features_df["fov_name"] = well_id + "/" + pos_id
+                features_df["track_id"] = row["track_id"]
+                features_df["t"] = timepoint
+                features_df["x"] = row["x"]
+                features_df["y"] = row["y"]
+
+            position_features.append(features_df)
+
+    if position_features:
+        # Concatenate the list of DataFrames
+        position_df = pd.concat(position_features, ignore_index=True)
+        return position_df
+    else:
+        return pd.DataFrame()
+
+    input_zarr.close()
+    # nuclear_labels_zarr.close()
+
 
 # %%
+if __name__ == "__main__":
+    input_path = Path(
+        "/hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV/4-phenotyping/train-test/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV.zarr"
+    )
+    input_zarr = open_ome_zarr(input_path, mode="r")
+    in_chans = input_zarr.channel_names
+    organelle_channel = "GFP EX488 EM525-45"
+
+    nuclear_labels_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV/1-preprocess/label-free/3-track/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV_cropped.zarr"
+    # nuclear_labels_zarr = open_ome_zarr(nuclear_labels_path, mode="r")
+    # nuclear_labels_chans = nuclear_labels_zarr.channel_names
+    # nuclear_labels = "nuclei_prediction_labels_labels"
+
+    output_root = (
+        Path(
+            "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/organelle_segmentation/output"
+        )
+        / input_path.stem
+    )
+    output_root.mkdir(parents=True, exist_ok=True)
+
+    output_csv = output_root / f"{input_path.stem}_mito_features.csv"
+
+    PATCH_SIZE = 160
+    frangi_params = {
+        "clahe_clip_limit": 0.01,  # Mild contrast enhancement (0.01-0.03 range)
+        "sigma_steps": 2,  # Multiple scales to capture size variation
+        "auto_optimize_sigma": False,  # Use multi-scale (max across scales)
+        "frangi_alpha": 0.5,  # Standard tubular structure sensitivity
+        "frangi_beta": 0.5,  # Standard blob rejection
+        "threshold_method": "nellie_max",  # CRITICAL: Manual threshold where auto methods fail
+        "min_object_size": 10,  # Remove small noise clusters (20-50 pixels)
+        "apply_morphology": False,  # Connect fragmented mitochondria
+    }
+
+    # Number of parallel jobs - adjust based on your system
+    # -1 means use all available cores, or set to a specific number
+    n_jobs = -1
+
+    # Collect all (well_id, pos_id) pairs to process
+    with open_ome_zarr(input_path, mode="r") as input_zarr:
+        positions_to_process = []
+        for well_id, well_data in input_zarr.wells():
+            for pos_id, pos_data in well_data.positions():
+                positions_to_process.append((well_id, pos_id))
+
+    print(
+        f"Processing {len(positions_to_process)} positions using {n_jobs} parallel jobs..."
+    )
+
+    # Process positions in parallel
+    results = Parallel(n_jobs=n_jobs, verbose=10)(
+        delayed(process_position)(
+            well_id=well_id,
+            pos_id=pos_id,
+            input_path=input_path,
+            nuclear_labels_path=nuclear_labels_path,
+            organelle_channel=organelle_channel,
+            patch_size=PATCH_SIZE,
+            frangi_params=frangi_params,
+        )
+        for well_id, pos_id in positions_to_process
+    )
+
+    # Combine all results and save
+    all_features = [df for df in results if not df.empty]
+    if all_features:
+        final_df = pd.concat(all_features, ignore_index=True)
+        # Save all at once instead of appending
+        output_csv.parent.mkdir(parents=True, exist_ok=True)
+        final_df.to_csv(output_csv, index=False)
+        print(f"Saved {len(final_df)} features to {output_csv}")