From e4397eaf1438951deb736d513fb9f92503b4f0b6 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Mon, 13 Oct 2025 19:31:33 -0700 Subject: [PATCH 1/4] proof of concept for segmentation and extraction of features --- .../extract_features.py | 296 ++++++++++++++ .../organelle_segmentation/segment_mito.py | 381 ++++++++++++++++++ .../segment_organelles.py | 295 ++++++++++++++ 3 files changed, 972 insertions(+) create mode 100644 applications/pseudotime_analysis/organelle_segmentation/extract_features.py create mode 100644 applications/pseudotime_analysis/organelle_segmentation/segment_mito.py create mode 100644 applications/pseudotime_analysis/organelle_segmentation/segment_organelles.py diff --git a/applications/pseudotime_analysis/organelle_segmentation/extract_features.py b/applications/pseudotime_analysis/organelle_segmentation/extract_features.py new file mode 100644 index 000000000..739bc1f2f --- /dev/null +++ b/applications/pseudotime_analysis/organelle_segmentation/extract_features.py @@ -0,0 +1,296 @@ +import warnings as warning + +import numpy as np +import pandas as pd +from numpy.typing import ArrayLike +from skimage import measure +from skimage.feature import graycomatrix, graycoprops + + +def extract_features_zyx( + labels_zyx: ArrayLike, + intensity_zyx: ArrayLike = None, + frangi_zyx: ArrayLike = None, + spacing: tuple = (1.0, 1.0), + properties: list = None, + extra_properties: list = None, +): + """ + Extract morphological and intensity features from labeled organelles. + + Handles both 2D (Z=1) and 3D (Z>1) data automatically + For 2D data, processes the single Z-slice. For 3D data, performs max projection + along Z axis before feature extraction. + + Based on: + Lefebvre, A.E.Y.T., Sturm, G., Lin, TY. et al. + Nellie (2025) https://doi.org/10.1038/s41592-025-02612-7 + + Parameters + ---------- + labels_zyx : ndarray + Labeled segmentation mask with shape (Z, Y, X). + Each unique integer value represents a different organelle instance. + intensity_zyx : ndarray, optional + Original intensity image with shape (Z, Y, X) for computing + intensity-based features. If None, only morphological features computed. + frangi_image : ndarray, optional + Frangi vesselness response with shape (Z, Y, X) for computing + tubularity/filament features. + spacing : tuple + Physical spacing in same units (e.g., µm). + For 2D (Z=1): (pixel_size_y, pixel_size_x) + For 3D (Z>1): (pixel_size_z, pixel_size_y, pixel_size_x) + properties : list of str, optional + List of standard regionprops features to compute. If None, uses default set. + Available: 'label', 'area', 'perimeter', 'axis_major_length', + 'axis_minor_length', 'solidity', 'extent', 'orientation', + 'equivalent_diameter_area', 'convex_area', 'eccentricity', + 'mean_intensity', 'min_intensity', 'max_intensity' + extra_properties : list of str, optional + Additional features beyond standard regionprops. Options: + - 'moments_hu': Hu moments (shape descriptors, 7 features) + - 'texture': Haralick texture features (4 features: contrast, homogeneity, energy, correlation) + - 'aspect_ratio': Major axis / minor axis ratio + - 'circularity': area / perimeter + - 'frangi_intensity': Mean/min/max/sum/std of Frangi vesselness + - 'feret_diameter_max': Maximum Feret diameter (expensive) + - 'sum_intensity': Sum of intensity values + - 'std_intensity': Standard deviation of intensity values + + Returns + ------- + features_df : pd.DataFrame + DataFrame where each row represents one labeled object with columns + for each computed feature. Always includes 'label' and 'channel' columns. + + Examples + -------- + >>> # Basic morphology only + >>> df = extract_features_zyx(labels_zyx) + + >>> # With intensity features + >>> df = extract_features_zyx(labels_zyx, intensity_zyx=intensity) + + >>> # Custom property selection + >>> df = extract_features_zyx( + ... labels_zyx, + ... intensity_zyx=intensity, + ... properties=['label', 'area', 'mean_intensity'], + ... extra_properties=['aspect_ratio', 'circularity'] + ... ) + + >>> # Full feature set including Frangi + >>> df = extract_features_zyx( + ... labels_zyx, + ... intensity_zyx=intensity, + ... frangi_image=vesselness, + ... extra_properties=['moments_hu', 'texture', 'frangi_intensity'] + ... ) + """ + + if intensity_zyx is not None: + assert ( + intensity_zyx.shape == labels_zyx.shape + ), "Image and labels must have same shape" + + Z, _, _ = labels_zyx.shape + + # Default properties if not specified + if properties is None: + properties = [ + "label", + "area", + "perimeter", + "axis_major_length", + "axis_minor_length", + "solidity", + "extent", + "orientation", + "equivalent_diameter_area", + "convex_area", + "eccentricity", + ] + # Add intensity features if image provided + if intensity_zyx is not None: + properties.extend(["mean_intensity", "min_intensity", "max_intensity"]) + + if extra_properties is None: + extra_properties = [] + + # Determine 2D vs 3D mode and set appropriate spacing + spacing_2d = spacing if len(spacing) == 2 else spacing[-2:] + + if Z == 1: + # Squeeze Z dimension for 2D processing + labels_processed = labels_zyx[0] # Shape: (Y, X) + intensity_processed = intensity_zyx[0] if intensity_zyx is not None else None + frangi_processed = frangi_zyx[0] if frangi_zyx is not None else None + else: + # Use max projection along Z for 3D -> 2D + labels_processed = np.max(labels_zyx, axis=0) # Shape: (Y, X) + intensity_processed = ( + np.max(intensity_zyx, axis=0) if intensity_zyx is not None else None + ) + frangi_processed = ( + np.max(frangi_zyx, axis=0) if frangi_zyx is not None else None + ) + + # Check if we have any objects to process + if labels_processed.max() == 0: + warning.warn(f"Warning: No objects found") + + # Compute base regionprops features (those that support spacing) + props_with_spacing = [p for p in properties if p not in ["moments_hu"]] + + try: + props_dict = measure.regionprops_table( + labels_processed, + intensity_image=intensity_processed, + properties=tuple(props_with_spacing), + spacing=spacing_2d, + ) + df = pd.DataFrame(props_dict) + except Exception as e: + warning.warn(f"Error computing base regionprops: {e}") + + # Add Hu moments separately (without spacing) + if "moments_hu" in properties or "moments_hu" in extra_properties: + try: + hu_props = measure.regionprops_table( + labels_processed, properties=("label", "moments_hu"), spacing=(1, 1) + ) + hu_df = pd.DataFrame(hu_props) + # Rename columns to be clearer + hu_rename = {f"moments_hu-{i}": f"hu_moment_{i}" for i in range(7)} + hu_df = hu_df.rename(columns=hu_rename) + df = df.merge(hu_df, on="label", how="left") + except Exception as e: + warning.warn(f"Could not compute Hu moments: {e}") + + # Add derived metrics + if "aspect_ratio" in extra_properties: + minor_axis = df["axis_minor_length"].replace(0, 1) # Avoid division by zero + df["aspect_ratio"] = df["axis_major_length"] / minor_axis + + if "circularity" in extra_properties: + perimeter_sq = df["perimeter"] ** 2 + df["circularity"] = np.divide( + 4 * np.pi * df["area"], + perimeter_sq, + out=np.ones_like(perimeter_sq), + where=perimeter_sq != 0, + ) + + # Add expensive/iterative features + if any( + prop in extra_properties + for prop in ["texture", "feret_diameter_max", "frangi_intensity"] + ): + regions = measure.regionprops( + labels_processed, intensity_image=intensity_processed + ) + extra_features = [] + + for region in regions: + features = {"label": region.label} + + # Haralick texture features + if "texture" in extra_properties and intensity_processed is not None: + min_r, min_c, max_r, max_c = region.bbox + region_intensity = ( + intensity_processed[min_r:max_r, min_c:max_c] * region.image + ) + + # Normalize to uint8 + min_val, max_val = region_intensity.min(), region_intensity.max() + if max_val > min_val: + region_uint8 = ( + (region_intensity - min_val) / (max_val - min_val) * 255 + ).astype(np.uint8) + else: + region_uint8 = np.zeros_like(region_intensity, dtype=np.uint8) + + try: + glcm = graycomatrix( + region_uint8, + distances=[1], + angles=[0], + levels=256, + symmetric=True, + normed=True, + ) + features["texture_contrast"] = graycoprops(glcm, "contrast")[0, 0] + features["texture_homogeneity"] = graycoprops(glcm, "homogeneity")[ + 0, 0 + ] + features["texture_energy"] = graycoprops(glcm, "energy")[0, 0] + features["texture_correlation"] = graycoprops(glcm, "correlation")[ + 0, 0 + ] + except Exception: + features["texture_contrast"] = np.nan + features["texture_homogeneity"] = np.nan + features["texture_energy"] = np.nan + features["texture_correlation"] = np.nan + + # Feret diameter + if "feret_diameter_max" in extra_properties: + features["feret_diameter_max"] = region.feret_diameter_max + + # Frangi intensity features + if "frangi_intensity" in extra_properties and frangi_processed is not None: + min_r, min_c, max_r, max_c = region.bbox + region_frangi = frangi_processed[min_r:max_r, min_c:max_c][region.image] + + if region_frangi.size > 0: + features["frangi_mean_intensity"] = np.mean(region_frangi) + features["frangi_min_intensity"] = np.min(region_frangi) + features["frangi_max_intensity"] = np.max(region_frangi) + features["frangi_sum_intensity"] = np.sum(region_frangi) + features["frangi_std_intensity"] = np.std(region_frangi) + else: + features["frangi_mean_intensity"] = np.nan + features["frangi_min_intensity"] = np.nan + features["frangi_max_intensity"] = np.nan + features["frangi_sum_intensity"] = np.nan + features["frangi_std_intensity"] = np.nan + + extra_features.append(features) + + if extra_features: + extra_df = pd.DataFrame(extra_features) + df = df.merge(extra_df, on="label", how="left") + + # Add sum and std intensity if we have intensity image + if intensity_processed is not None and ( + "sum_intensity" in extra_properties or "std_intensity" in extra_properties + ): + regions = measure.regionprops( + labels_processed, intensity_image=intensity_processed + ) + sum_std_features = [] + + for region in regions: + min_r, min_c, max_r, max_c = region.bbox + region_pixels = intensity_processed[min_r:max_r, min_c:max_c][region.image] + + features = {"label": region.label} + if region_pixels.size > 0: + if "sum_intensity" in extra_properties: + features["sum_intensity"] = np.sum(region_pixels) + if "std_intensity" in extra_properties: + features["std_intensity"] = np.std(region_pixels) + else: + if "sum_intensity" in extra_properties: + features["sum_intensity"] = np.nan + if "std_intensity" in extra_properties: + features["std_intensity"] = np.nan + + sum_std_features.append(features) + + if sum_std_features: + sum_std_df = pd.DataFrame(sum_std_features) + df = df.merge(sum_std_df, on="label", how="left") + + return df diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py b/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py new file mode 100644 index 000000000..bffe9d1c0 --- /dev/null +++ b/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py @@ -0,0 +1,381 @@ +# %% code for organelle and nuclear segmentation and feature extraction + +import os +from pathlib import Path + +import napari +import numpy as np +import pandas as pd +from extract_features import ( + extract_features_zyx, +) +from iohub import open_ome_zarr +from matplotlib import pyplot as plt +from segment_organelles import ( + calculate_nellie_sigmas, + segment_zyx, +) +from skimage.exposure import rescale_intensity +from tqdm import tqdm + +os.environ["DISPLAY"] = ":1" +viewer = napari.Viewer() +# %% + +# input_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/train-test/2024_11_21_A549_TOMM20_DENV.zarr" +input_path = Path( + "/hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV/4-phenotyping/train-test/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV.zarr" +) +# input_path = "/hpc/projects/intracellular_dashboard/organelle_box/2025_04_04_organelle_box_Live_dye/4-concatenate/organelle_box_live_dye_TOMM20.zarr" +input_zarr = open_ome_zarr(input_path, mode="r", layout="hcs") +in_chans = input_zarr.channel_names +organelle_channel_name = "GFP EX488 EM525-45" +# Organelle_chan = "4-MultiCam_GFP_Cy5-BSI_Express" + +output_root = ( + Path( + "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/infection_state/output" + ) + / input_path.stem +) +output_root.mkdir(parents=True, exist_ok=True) + +# %% +# Frangi parameters - WORKING configuration for 2D mitochondria segmentation +frangi_params = { + "clahe_clip_limit": 0.01, # Mild contrast enhancement (0.01-0.03 range) + "sigma_steps": 2, # Multiple scales to capture size variation + "auto_optimize_sigma": False, # Use multi-scale (max across scales) + "frangi_alpha": 0.5, # Standard tubular structure sensitivity + "frangi_beta": 0.5, # Standard blob rejection + "threshold_method": "nellie_max", # CRITICAL: Manual threshold where auto methods fail + "min_object_size": 10, # Remove small noise clusters (20-50 pixels) + "apply_morphology": False, # Connect fragmented mitochondria +} + + +position_names = [] +for ds, position in input_zarr.positions(): + position_names.append(tuple(ds.split("/"))) + + +for well_id, well_data in input_zarr.wells(): + # print(well_id) + # well_id, well_data = next(input_zarr.wells()) + well_name, well_no = well_id.split("/") + + if "B/1" not in well_id: + continue + # if well_id == 'C/4': + # print(well_name, well_no) + for pos_id, pos_data in well_data.positions(): + if pos_id != "000000": + continue + scale = ( + pos_data.metadata.multiscales[0] + .datasets[0] + .coordinate_transformations[0] + .scale + ) + pixel_size_um = scale[-1] # XY pixel size in micrometers + z_spacing_um = scale[-3] # Z spacing in micrometers + print(f" Pixel size: {pixel_size_um:.4f} µm, Z spacing: {z_spacing_um:.4f} µm") + + in_data = pos_data.data.numpy() + if in_data.shape[-3] != 1: + in_data = np.max(in_data, axis=-3, keepdims=True) + T, C, Z, Y, X = in_data.shape + print(f"Input data shape: {in_data.shape} (T={T}, C={C}, Z={Z}, Y={Y}, X={X})") + + # Extract and normalize organelle channel (keep Z dimension) + organelle_data = in_data[ + :, in_chans.index(organelle_channel_name), : + ] # (T, Z, Y, X) + organelle_data = rescale_intensity(organelle_data, out_range=(0, 1)) + print(f"Organelle data shape after extraction: {organelle_data.shape}") + + # Calculate sigma range - ADJUSTED for your pixel size + # With 0.1494 µm/pixel, mitochondria (0.3-1.0 µm diameter) = 2-7 pixels diameter + # Sigma should be ~radius/2, so for diameter 2-7px, sigma = 0.5-1.75 px + min_radius_um = 0.15 # 300 nm diameter = ~2 pixels + max_radius_um = 0.6 # 1 µm diameter = ~6.7 pixels + sigma_range = calculate_nellie_sigmas( + min_radius_um, + max_radius_um, + pixel_size_um, + num_sigma=frangi_params["sigma_steps"], + ) + + print(f"Using sigma range: {sigma_range[0]:.2f} to {sigma_range[1]:.2f} pixels") + + # Frangi filtering and segmentation + print( + f"Computing Frangi segmentation and feature extraction for {well_id}/{pos_id}..." + ) + frangi_seg_masks = [] + frangi_vesselness_maps = [] + all_features = [] + + # FIXME: temporary for testing + selected_timepoints = np.linspace(0, T - 1, 3).astype(int) + for t in tqdm(selected_timepoints, desc="Processing timepoints"): + labels, vesselness, optimal_sigma = segment_zyx( + organelle_data[t], sigma_range=sigma_range, **frangi_params + ) + frangi_seg_masks.append(labels[0]) + frangi_vesselness_maps.append(vesselness[0]) + + # Extract features from this timepoint + features_df = extract_features_zyx( + labels_zyx=labels, + intensity_zyx=organelle_data[t], + frangi_zyx=vesselness, + spacing=(pixel_size_um, pixel_size_um), + extra_properties=[ + "aspect_ratio", + "circularity", + "frangi_intensity", + # "texture", + # "moments_hu", + ], + ) + + if not features_df.empty: + features_df["well_id"] = well_id + features_df["position_id"] = pos_id + features_df["timepoint"] = t + all_features.append(features_df) + + frangi_seg_masks = np.array(frangi_seg_masks) + frangi_vesselness_maps = np.array(frangi_vesselness_maps) + + # Save combined features + if all_features: + combined_features = pd.concat(all_features, ignore_index=True) + output_csv = output_root / f"features_{well_name}_{well_no}_{pos_id}.csv" + combined_features.to_csv(output_csv, index=False) + print(f" Saved {len(combined_features)} object features to {output_csv}") + + # Convert to output format (T_actual, C=1, Z, Y, X) + T_actual = frangi_seg_masks.shape[0] + out_data = frangi_seg_masks[:, :, :].astype(np.uint32) + print(f" Processed {T_actual} timepoints, output shape: {out_data.shape}") + + position_key = (well_name, well_no, pos_id) + +# %% + +viewer.add_image(organelle_data[selected_timepoints, 0]) +viewer.add_labels(frangi_seg_masks) + + +# %% +# Plot mitochondrial dynamics: elongation and fragmentation + +if all_features: + df = combined_features + + # Aggregate features per timepoint + timepoint_summary = ( + df.groupby("timepoint") + .agg( + { + "label": "count", # Number of mitochondrial objects + "area": ["mean", "median", "sum"], # Size metrics + "aspect_ratio": ["mean", "median"], # Elongation metric + "circularity": ["mean", "median"], # Shape metric + "frangi_mean_intensity": ["mean", "median"], # Tubularity metric + # "moments_hu_1": ["mean", "median"], # Shape descriptor + # "moments_hu_2": ["mean", "median"], # Shape descriptor + # "moments_hu_3": ["mean", "median"], # Shape descriptor + # "moments_hu_4": ["mean", "median"], # Shape descriptor + # "contrast": ["mean", "median"], # Texture metric + } + ) + .reset_index() + ) + + # Flatten column names + timepoint_summary.columns = [ + "_".join(col).strip("_") for col in timepoint_summary.columns.values + ] + + # Create figure with subplots + fig, axes = plt.subplots(2, 3, figsize=(15, 10)) + fig.suptitle( + f"Mitochondrial Dynamics: {well_id}/{pos_id}", fontsize=14, fontweight="bold" + ) + + # Plot 1: Number of objects (fragmentation indicator) + ax = axes[0, 0] + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["label_count"], + marker="o", + linewidth=2, + markersize=8, + color="#1f77b4", + ) + ax.set_xlabel("Timepoint", fontsize=11) + ax.set_ylabel("Number of Objects", fontsize=11) + ax.set_title("Fragmentation (Object Count)", fontsize=12, fontweight="bold") + ax.grid(True, alpha=0.3) + + # Plot 2: Mean area per object + ax = axes[0, 1] + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["area_mean"], + marker="o", + linewidth=2, + markersize=8, + color="#ff7f0e", + label="Mean", + ) + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["area_median"], + marker="s", + linewidth=2, + markersize=7, + color="#d62728", + label="Median", + alpha=0.7, + ) + ax.set_xlabel("Timepoint", fontsize=11) + ax.set_ylabel("Area (µm²)", fontsize=11) + ax.set_title("Mitochondrial Size", fontsize=12, fontweight="bold") + ax.legend() + ax.grid(True, alpha=0.3) + + # Plot 3: Total area (network coverage) + ax = axes[0, 2] + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["area_sum"], + marker="o", + linewidth=2, + markersize=8, + color="#2ca02c", + ) + ax.set_xlabel("Timepoint", fontsize=11) + ax.set_ylabel("Total Area (µm²)", fontsize=11) + ax.set_title("Total Mitochondrial Coverage", fontsize=12, fontweight="bold") + ax.grid(True, alpha=0.3) + + # Plot 4: Aspect ratio (elongation) + ax = axes[1, 0] + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["aspect_ratio_mean"], + marker="o", + linewidth=2, + markersize=8, + color="#9467bd", + label="Mean", + ) + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["aspect_ratio_median"], + marker="s", + linewidth=2, + markersize=7, + color="#8c564b", + label="Median", + alpha=0.7, + ) + ax.set_xlabel("Timepoint", fontsize=11) + ax.set_ylabel("Aspect Ratio", fontsize=11) + ax.set_title("Elongation (Aspect Ratio)", fontsize=12, fontweight="bold") + ax.legend() + ax.grid(True, alpha=0.3) + + # Plot 5: Circularity + ax = axes[1, 1] + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["circularity_mean"], + marker="o", + linewidth=2, + markersize=8, + color="#e377c2", + label="Mean", + ) + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["circularity_median"], + marker="s", + linewidth=2, + markersize=7, + color="#7f7f7f", + label="Median", + alpha=0.7, + ) + ax.set_xlabel("Timepoint", fontsize=11) + ax.set_ylabel("Circularity", fontsize=11) + ax.set_title("Shape Circularity", fontsize=12, fontweight="bold") + ax.legend() + ax.grid(True, alpha=0.3) + + # Plot 6: Frangi vesselness (tubularity) + ax = axes[1, 2] + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["frangi_mean_intensity_mean"], + marker="o", + linewidth=2, + markersize=8, + color="#bcbd22", + label="Mean", + ) + ax.plot( + timepoint_summary["timepoint"], + timepoint_summary["frangi_mean_intensity_median"], + marker="s", + linewidth=2, + markersize=7, + color="#17becf", + label="Median", + alpha=0.7, + ) + ax.set_xlabel("Timepoint", fontsize=11) + ax.set_ylabel("Frangi Vesselness", fontsize=11) + ax.set_title("Tubularity (Frangi)", fontsize=12, fontweight="bold") + ax.legend() + ax.grid(True, alpha=0.3) + + plt.tight_layout() + + # Save figure + output_fig = output_root / f"dynamics_{well_name}_{well_no}_{pos_id}.png" + plt.savefig(output_fig, dpi=300, bbox_inches="tight") + print(f" Saved dynamics plot to {output_fig}") + + plt.show() + + # Print summary statistics + print(f"\n=== Mitochondrial Dynamics Summary ===") + print(f"Position: {well_id}/{pos_id}") + print(f"\nTimepoint range: {selected_timepoints[0]} -> {selected_timepoints[-1]}") + print(f"\nFragmentation (Object Count):") + print(f" Start: {timepoint_summary['label_count'].iloc[0]:.0f} objects") + print(f" End: {timepoint_summary['label_count'].iloc[-1]:.0f} objects") + print( + f" Change: {timepoint_summary['label_count'].iloc[-1] - timepoint_summary['label_count'].iloc[0]:+.0f} ({(timepoint_summary['label_count'].iloc[-1]/timepoint_summary['label_count'].iloc[0] - 1)*100:+.1f}%)" + ) + + print(f"\nElongation (Aspect Ratio):") + print(f" Start: {timepoint_summary['aspect_ratio_mean'].iloc[0]:.2f}") + print(f" End: {timepoint_summary['aspect_ratio_mean'].iloc[-1]:.2f}") + print( + f" Change: {timepoint_summary['aspect_ratio_mean'].iloc[-1] - timepoint_summary['aspect_ratio_mean'].iloc[0]:+.2f} ({(timepoint_summary['aspect_ratio_mean'].iloc[-1]/timepoint_summary['aspect_ratio_mean'].iloc[0] - 1)*100:+.1f}%)" + ) + + print(f"\nMean Object Size (Area):") + print(f" Start: {timepoint_summary['area_mean'].iloc[0]:.2f} µm²") + print(f" End: {timepoint_summary['area_mean'].iloc[-1]:.2f} µm²") + print( + f" Change: {timepoint_summary['area_mean'].iloc[-1] - timepoint_summary['area_mean'].iloc[0]:+.2f} µm² ({(timepoint_summary['area_mean'].iloc[-1]/timepoint_summary['area_mean'].iloc[0] - 1)*100:+.1f}%)" + ) + +# %% diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_organelles.py b/applications/pseudotime_analysis/organelle_segmentation/segment_organelles.py new file mode 100644 index 000000000..be255520e --- /dev/null +++ b/applications/pseudotime_analysis/organelle_segmentation/segment_organelles.py @@ -0,0 +1,295 @@ +import logging + +import numpy as np +from numpy.typing import ArrayLike +from skimage import measure, morphology +from skimage.exposure import equalize_adapthist +from skimage.filters import frangi, threshold_otsu, threshold_triangle + +_logger = logging.getLogger("viscy") + + +def segment_zyx( + input_zyx: ArrayLike, + clahe_kernel_size=None, + clahe_clip_limit=0.01, + sigma_range=(0.5, 3.0), + sigma_steps=5, + auto_optimize_sigma=True, + frangi_alpha=0.5, + frangi_beta=0.5, + frangi_gamma=None, + threshold_method="otsu", + min_object_size=10, + apply_morphology=True, +): + """ + Segment mitochondria from a 2D or 3D input_zyx using CLAHE preprocessing, + Frangi filtering, and connected component labeling. + + Based on: + Lefebvre, A.E.Y.T., Sturm, G., Lin, TY. et al. + Nellie (2025) https://doi.org/10.1038/s41592-025-02612-7 + + Parameters + ---------- + input_zyx : ndarray + Input image with shape (Z, Y, X) for 3D. + If 2D, uses 2D Frangi filter. If 3D with Z=1, squeezes to 2D. + clahe_kernel_size : int or None + Kernel size for CLAHE (Contrast Limited Adaptive Histogram Equalization). + If None, automatically set to max(input_zyx.shape) // 8. + clahe_clip_limit : float + Clipping limit for CLAHE, normalized between 0 and 1 (default: 0.01). + sigma_range : tuple of float + Range of sigma values to test for Frangi filter (min_sigma, max_sigma). + Represents the scale of structures to detect. + sigma_steps : int + Number of sigma values to test in the range. + auto_optimize_sigma : bool + If True, automatically finds optimal sigma by maximizing vesselness response. + If False, uses all sigmas in range for multi-scale filtering. + frangi_alpha : float + Frangi filter sensitivity to plate-j like structures (2D) or blob-like (3D). + frangi_beta : float + Frangi filter sensitivity to blob-like structures (2D) or tube-like (3D). + frangi_gamma : float or None + Frangi filter sensitivity to background noise. If None, auto-computed. + threshold_method : str + Thresholding method: 'otsu', 'triangle', 'percentile', 'manual_X'. + min_object_size : int + Minimum object size in pixels for connected components. + apply_morphology : bool + If True, applies morphological closing to connect nearby structures. + + Returns + ------- + labels : ndarray + Instance segmentation labels with same dimensionality as input. + vesselness : ndarray + Filtered vesselness response with same dimensionality as input. + optimal_sigma : float or None + The optimal sigma value if auto_optimize_sigma=True, else None. + """ + + assert input_zyx.ndim == 3 + Z, Y, X = input_zyx.shape[-3:] + + if clahe_kernel_size is None: + clahe_kernel_size = max(Z, Y, X) // 8 + + # Apply CLAHE for contrast enhancement + enhanced_zyx = equalize_adapthist( + input_zyx, + kernel_size=clahe_kernel_size, + clip_limit=clahe_clip_limit, + ) + + # Generate sigma values + sigmas = np.linspace(sigma_range[0], sigma_range[1], sigma_steps) + + # Auto-optimize sigma or use multi-scale + if auto_optimize_sigma: + optimal_sigma, vesselness = _find_optimal_sigma( + enhanced_zyx, sigmas, frangi_alpha, frangi_beta, frangi_gamma + ) + else: + optimal_sigma = None + vesselness = _multiscale_frangi( + enhanced_zyx, sigmas, frangi_alpha, frangi_beta, frangi_gamma + ) + + # Threshold the vesselness response + if threshold_method == "otsu": + threshold = threshold_otsu(vesselness) + _logger.debug(f"Otsu threshold: {threshold:.4f}") + elif threshold_method == "triangle": + threshold = threshold_triangle(vesselness) + _logger.debug(f"Triangle threshold: {threshold:.4f}") + elif threshold_method == "nellie_min": + threshold_otsu_val = threshold_otsu(vesselness) + threshold_triangle_val = threshold_triangle(vesselness) + threshold = min(threshold_otsu_val, threshold_triangle_val) + _logger.debug( + f"Nellie-min threshold: otsu={threshold_otsu_val:.4f}, triangle={threshold_triangle_val:.4f}, using min={threshold:.4f}" + ) + elif threshold_method == "nellie_max": + threshold_otsu_val = threshold_otsu(vesselness) + threshold_triangle_val = threshold_triangle(vesselness) + threshold = max(threshold_otsu_val, threshold_triangle_val) + _logger.debug( + f"Nellie-max threshold: otsu={threshold_otsu_val:.4f}, triangle={threshold_triangle_val:.4f}, using max={threshold:.4f}" + ) + elif threshold_method == "percentile": + # Use percentile-based threshold (good for sparse features) + threshold = np.percentile(vesselness[vesselness > 0], 95) # Keep top 5% + _logger.debug(f"Percentile (95th) threshold: {threshold:.4f}") + elif threshold_method.startswith("manual_"): + # Manual threshold: "manual_0.05" means threshold at 0.05 + threshold = float(threshold_method.split("_")[1]) + _logger.debug(f"Manual threshold: {threshold:.4f}") + else: + raise ValueError(f"Unknown threshold method: {threshold_method}") + + binary_mask = vesselness > threshold + + _logger.debug( + f" Selected {binary_mask.sum()} / {binary_mask.size} pixels ({100*binary_mask.sum()/binary_mask.size:.2f}%)" + ) + + # Apply morphological operations + if apply_morphology: + binary_mask = morphology.binary_closing( + binary_mask, footprint=morphology.ball(1) + ) + binary_mask = morphology.remove_small_holes(binary_mask, area_threshold=64) + + # Label connected components + labels = measure.label(binary_mask, connectivity=2) + + # Remove small objects + labels = morphology.remove_small_objects(labels, min_size=min_object_size) + + if Z == 1: + labels = labels[np.newaxis, ...] + vesselness = vesselness[np.newaxis, ...] + + return labels, vesselness, optimal_sigma + + +def _find_optimal_sigma(input_zyx, sigmas, alpha, beta, gamma): + """ + Find the optimal sigma that maximizes the vesselness response. + + Parameters + ---------- + input_zyx : ndarray + 3D input_zyx (Z, Y, X). + sigmas : array-like + Sigma values to test. + alpha, beta, gamma : float + Frangi filter parameters. + + Returns + ------- + optimal_sigma : float + The sigma with the highest mean vesselness response. + vesselness : ndarray + The vesselness response using optimal sigma. + """ + best_sigma = sigmas[0] + best_score = -np.inf + best_vesselness = None + + if input_zyx.shape[0] == 1: + input_zyx = input_zyx[0] + + for sigma in sigmas: + vessel = frangi( + input_zyx, + sigmas=[sigma], + alpha=alpha, + beta=beta, + gamma=gamma, + black_ridges=False, + ) + + # Score is the mean of top 10% vesselness values + score = np.mean( + np.partition(vessel.ravel(), -int(0.1 * vessel.size))[ + -int(0.1 * vessel.size) : + ] + ) + + if score > best_score: + best_score = score + best_sigma = sigma + best_vesselness = vessel + + if input_zyx.shape[0] == 1: + best_vesselness = best_vesselness[np.newaxis, ...] + + return best_sigma, best_vesselness + + +def _multiscale_frangi( + input_zyx, sigmas: ArrayLike, alpha: float, beta: float, gamma: float +): + """ + Apply Frangi filter at multiple scales and return the maximum response. + + Parameters + ---------- + input_zyx : ndarray + 3D input_zyx (Z, Y, X). + sigmas : array-like + Sigma values for multi-scale filtering. + alpha, beta, gamma : float + Frangi filter parameters. + + Returns + ------- + vesselness : ndarray + Maximum vesselness response across all scales. + """ + if input_zyx.shape[0] == 1: + input_zyx = input_zyx[0] + vesselness = frangi( + input_zyx, + sigmas=sigmas, + alpha=alpha, + beta=beta, + gamma=gamma, + black_ridges=False, + ) + if input_zyx.shape[0] == 1: + vesselness = vesselness[np.newaxis, ...] + return vesselness + + +def calculate_nellie_sigmas( + min_radius_um, max_radius_um, pixel_size_um, num_sigma=5, min_step_size_px=0.2 +): + """ + Calculate sigma values following Nellie's approach. + + Parameters + ---------- + min_radius_um : float + Minimum structure radius in micrometers (e.g., 0.2 for diffraction limit) + max_radius_um : float + Maximum structure radius in micrometers (e.g., 1.0 for thick tubules) + pixel_size_um : float + Pixel size in micrometers + num_sigma : int + Target number of sigma values + min_step_size_px : float + Minimum step size between sigmas in pixels + + Returns + ------- + tuple : (sigma_min, sigma_max) + Sigma range in pixels + """ + min_radius_px = min_radius_um / pixel_size_um + max_radius_px = max_radius_um / pixel_size_um + + # Nellie uses radius/2 to radius/3 as sigma + sigma_1 = min_radius_px / 2 + sigma_2 = max_radius_px / 3 + sigma_min = min(sigma_1, sigma_2) + sigma_max = max(sigma_1, sigma_2) + + # Calculate step size with minimum constraint + sigma_step_calculated = (sigma_max - sigma_min) / num_sigma + sigma_step = max(min_step_size_px, sigma_step_calculated) + + sigmas = list(np.arange(sigma_min, sigma_max + sigma_step, sigma_step)) + + _logger.debug(f" Nellie-style sigmas: {sigma_min:.3f} to {sigma_max:.3f} pixels") + _logger.debug( + f" Radius range: {min_radius_um:.3f}-{max_radius_um:.3f} µm = {min_radius_px:.2f}-{max_radius_px:.2f} pixels" + ) + _logger.debug(f" Sigma values: {[f'{s:.2f}' for s in sigmas]}") + + return (sigma_min, sigma_max) From 0d5a46ab6ebbbe6fea515c07bd05e7c3eb6b7f17 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Mon, 13 Oct 2025 19:34:16 -0700 Subject: [PATCH 2/4] update folder output for demo file --- .../pseudotime_analysis/organelle_segmentation/segment_mito.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py b/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py index bffe9d1c0..4f68ca9f7 100644 --- a/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py +++ b/applications/pseudotime_analysis/organelle_segmentation/segment_mito.py @@ -34,7 +34,7 @@ output_root = ( Path( - "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/infection_state/output" + "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/organelle_segmentation/output" ) / input_path.stem ) From 6d2e5c9d1c6032d5e3815d55116284e4e49735c0 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Mon, 13 Oct 2025 20:32:24 -0700 Subject: [PATCH 3/4] code to generate the pandas dataframe per position --- .../organelle_segmentation/segment_mito_sc.py | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py b/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py new file mode 100644 index 000000000..202251ac0 --- /dev/null +++ b/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py @@ -0,0 +1,164 @@ +# %% +import os +from pathlib import Path + +import numpy as np +import pandas as pd +from extract_features import ( + extract_features_zyx, +) +from iohub import open_ome_zarr +from matplotlib import pyplot as plt +from segment_organelles import ( + calculate_nellie_sigmas, + segment_zyx, +) +from skimage.exposure import rescale_intensity +from tqdm import tqdm + +# %% + +input_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/train-test/2024_11_21_A549_TOMM20_DENV.zarr" +input_zarr = open_ome_zarr(input_path, mode="r") +in_chans = input_zarr.channel_names +organelle_channel = "GFP EX488 EM525-45" + +org_seg_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/1-predictions/quantify_remodeling/segmentation_TOMM20_filaments.zarr" +seg_zarr = open_ome_zarr(org_seg_path, mode="r") +seg_chans = seg_zarr.channel_names +organelle_labels = "Organelle_mask" + +nuclear_labels_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/1-preprocess/label-free/3-track/2024_11_21_A549_TOMM20_DENV_cropped.zarr" +nuclear_labels_zarr = open_ome_zarr(nuclear_labels_path, mode="r") +nuclear_labels_chans = nuclear_labels_zarr.channel_names +nuclear_labels = "nuclei_prediction_labels_labels" + +output_root = Path( + "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/organelle_segmentation/output" +) +output_csv = output_root / "2024_11_21_A549_TOMM20_DENV/organelle_seg_features.csv" + + +# %% library of feature extraction methods +def get_patch(data, cell_centroid, PATCH_SIZE): + + x_centroid, y_centroid = cell_centroid + # ensure patch boundaries stay within data dimensions + x_start = max(0, x_centroid - PATCH_SIZE // 2) + x_end = min(data.shape[1], x_centroid + PATCH_SIZE // 2) + y_start = max(0, y_centroid - PATCH_SIZE // 2) + y_end = min(data.shape[0], y_centroid + PATCH_SIZE // 2) + + # get patch of PATCH_SIZE centered on centroid + patch = data[int(y_start) : int(y_end), int(x_start) : int(x_end)] + return patch + + +# %% +PATCH_SIZE = 160 +frangi_params = { + "clahe_clip_limit": 0.01, # Mild contrast enhancement (0.01-0.03 range) + "sigma_steps": 2, # Multiple scales to capture size variation + "auto_optimize_sigma": False, # Use multi-scale (max across scales) + "frangi_alpha": 0.5, # Standard tubular structure sensitivity + "frangi_beta": 0.5, # Standard blob rejection + "threshold_method": "nellie_max", # CRITICAL: Manual threshold where auto methods fail + "min_object_size": 10, # Remove small noise clusters (20-50 pixels) + "apply_morphology": False, # Connect fragmented mitochondria +} + + +for well_id, well_data in tqdm(seg_zarr.wells(), desc="Wells", position=0): + # well_id, well_data = next(seg_zarr.wells()) + well_name, well_no = well_id.split("/") + # if well_id in wells_to_process: + for pos_id, pos_data in tqdm( + well_data.positions(), desc=f"Positions in {well_id}", position=1, leave=False + ): + # pos_id, pos_data = next(well_data.positions()) + T, C, Z, Y, X = pos_data.data.shape + seg_data = pos_data.data.numpy() + org_seg_mask = seg_data[:, seg_chans.index(organelle_labels)] + + # read the csv stored in each nucl seg zarr folder + file_name = "tracks_" + well_name + "_" + well_no + "_" + pos_id + ".csv" + nuclear_labels_csv = os.path.join( + nuclear_labels_path, well_id + "/" + pos_id + "/" + file_name + ) + nuclear_labels_df = pd.read_csv(nuclear_labels_csv) + + in_data = input_zarr[well_id + "/" + pos_id].data.numpy() + scale_um = input_zarr[well_id + "/" + pos_id].scale + organelle_data = in_data[:, in_chans.index(organelle_channel)] + + # Initialize an empty list to store values from each row of the csv + position_features = [] + for idx, row in nuclear_labels_df.iterrows(): + + if ( + row["x"] > PATCH_SIZE // 2 + and row["y"] > PATCH_SIZE // 2 + and row["x"] < X - PATCH_SIZE // 2 + and row["y"] < Y - PATCH_SIZE // 2 + ): + cell_centroid = row["x"], row["y"] + + timepoint = row["t"] + organelle_patch = get_patch( + organelle_data[int(timepoint), 0], cell_centroid, PATCH_SIZE + ) + + # + organelle_patch = organelle_patch[np.newaxis] + organelle_patch = rescale_intensity( + organelle_patch, + out_range=(0, 1), + ) + # FIXME set some defualt for the mitochondria and other organelles + min_radius_um = 0.15 # 300 nm diameter = ~2 pixels + max_radius_um = 0.6 # 1 µm diameter = ~6.7 pixels + sigma_range = calculate_nellie_sigmas( + min_radius_um, + max_radius_um, + scale_um[-1], # use the highest res axis (XY) + num_sigma=frangi_params["sigma_steps"], + ) + + labels, vesselness, optimal_sigma = segment_zyx( + organelle_patch, sigma_range=sigma_range, **frangi_params + ) + features_df = extract_features_zyx( + labels_zyx=labels, + intensity_zyx=organelle_patch, + frangi_zyx=vesselness, + spacing=(scale_um[-1], scale_um[-1]), + extra_properties=[ + "aspect_ratio", + "circularity", + "frangi_intensity", + "texture", + "moments_hu", + ], + ) + + if not features_df.empty: + features_df["fov_name"] = "/" + well_id + "/" + pos_id + features_df["track_id"] = row["track_id"] + features_df["t"] = timepoint + features_df["x"] = row["x"] + features_df["y"] = row["y"] + + position_features.append(features_df) + + if position_features: + # Concatenate the list of DataFrames + position_df = pd.concat(position_features, ignore_index=True) + position_df.to_csv( + output_csv, + mode="a", + header=not os.path.exists(output_csv), + index=False, + ) + + +# %% From c2160c6cccc3e8976e0b4c4c77d8b40a3f9eb19a Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Tue, 14 Oct 2025 11:02:24 -0700 Subject: [PATCH 4/4] parallelized segment mito features --- .../organelle_segmentation/segment_mito_sc.py | 293 ++++++++++-------- 1 file changed, 168 insertions(+), 125 deletions(-) diff --git a/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py b/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py index 202251ac0..df60ebde8 100644 --- a/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py +++ b/applications/pseudotime_analysis/organelle_segmentation/segment_mito_sc.py @@ -8,6 +8,7 @@ extract_features_zyx, ) from iohub import open_ome_zarr +from joblib import Parallel, delayed from matplotlib import pyplot as plt from segment_organelles import ( calculate_nellie_sigmas, @@ -16,28 +17,6 @@ from skimage.exposure import rescale_intensity from tqdm import tqdm -# %% - -input_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/train-test/2024_11_21_A549_TOMM20_DENV.zarr" -input_zarr = open_ome_zarr(input_path, mode="r") -in_chans = input_zarr.channel_names -organelle_channel = "GFP EX488 EM525-45" - -org_seg_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/4-phenotyping/1-predictions/quantify_remodeling/segmentation_TOMM20_filaments.zarr" -seg_zarr = open_ome_zarr(org_seg_path, mode="r") -seg_chans = seg_zarr.channel_names -organelle_labels = "Organelle_mask" - -nuclear_labels_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2024_11_21_A549_TOMM20_DENV/1-preprocess/label-free/3-track/2024_11_21_A549_TOMM20_DENV_cropped.zarr" -nuclear_labels_zarr = open_ome_zarr(nuclear_labels_path, mode="r") -nuclear_labels_chans = nuclear_labels_zarr.channel_names -nuclear_labels = "nuclei_prediction_labels_labels" - -output_root = Path( - "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/organelle_segmentation/output" -) -output_csv = output_root / "2024_11_21_A549_TOMM20_DENV/organelle_seg_features.csv" - # %% library of feature extraction methods def get_patch(data, cell_centroid, PATCH_SIZE): @@ -54,111 +33,175 @@ def get_patch(data, cell_centroid, PATCH_SIZE): return patch -# %% -PATCH_SIZE = 160 -frangi_params = { - "clahe_clip_limit": 0.01, # Mild contrast enhancement (0.01-0.03 range) - "sigma_steps": 2, # Multiple scales to capture size variation - "auto_optimize_sigma": False, # Use multi-scale (max across scales) - "frangi_alpha": 0.5, # Standard tubular structure sensitivity - "frangi_beta": 0.5, # Standard blob rejection - "threshold_method": "nellie_max", # CRITICAL: Manual threshold where auto methods fail - "min_object_size": 10, # Remove small noise clusters (20-50 pixels) - "apply_morphology": False, # Connect fragmented mitochondria -} - - -for well_id, well_data in tqdm(seg_zarr.wells(), desc="Wells", position=0): - # well_id, well_data = next(seg_zarr.wells()) +def process_position( + well_id, + pos_id, + input_path, + nuclear_labels_path, + organelle_channel, + patch_size, + frangi_params, +): + """Process a single position and return the features DataFrame.""" + # Open zarr stores (each worker needs its own file handles) + input_zarr = open_ome_zarr(input_path, mode="r") + well_name, well_no = well_id.split("/") - # if well_id in wells_to_process: - for pos_id, pos_data in tqdm( - well_data.positions(), desc=f"Positions in {well_id}", position=1, leave=False - ): - # pos_id, pos_data = next(well_data.positions()) - T, C, Z, Y, X = pos_data.data.shape - seg_data = pos_data.data.numpy() - org_seg_mask = seg_data[:, seg_chans.index(organelle_labels)] - - # read the csv stored in each nucl seg zarr folder - file_name = "tracks_" + well_name + "_" + well_no + "_" + pos_id + ".csv" - nuclear_labels_csv = os.path.join( - nuclear_labels_path, well_id + "/" + pos_id + "/" + file_name - ) - nuclear_labels_df = pd.read_csv(nuclear_labels_csv) - - in_data = input_zarr[well_id + "/" + pos_id].data.numpy() - scale_um = input_zarr[well_id + "/" + pos_id].scale - organelle_data = in_data[:, in_chans.index(organelle_channel)] - - # Initialize an empty list to store values from each row of the csv - position_features = [] - for idx, row in nuclear_labels_df.iterrows(): - - if ( - row["x"] > PATCH_SIZE // 2 - and row["y"] > PATCH_SIZE // 2 - and row["x"] < X - PATCH_SIZE // 2 - and row["y"] < Y - PATCH_SIZE // 2 - ): - cell_centroid = row["x"], row["y"] - - timepoint = row["t"] - organelle_patch = get_patch( - organelle_data[int(timepoint), 0], cell_centroid, PATCH_SIZE - ) - - # - organelle_patch = organelle_patch[np.newaxis] - organelle_patch = rescale_intensity( - organelle_patch, - out_range=(0, 1), - ) - # FIXME set some defualt for the mitochondria and other organelles - min_radius_um = 0.15 # 300 nm diameter = ~2 pixels - max_radius_um = 0.6 # 1 µm diameter = ~6.7 pixels - sigma_range = calculate_nellie_sigmas( - min_radius_um, - max_radius_um, - scale_um[-1], # use the highest res axis (XY) - num_sigma=frangi_params["sigma_steps"], - ) - - labels, vesselness, optimal_sigma = segment_zyx( - organelle_patch, sigma_range=sigma_range, **frangi_params - ) - features_df = extract_features_zyx( - labels_zyx=labels, - intensity_zyx=organelle_patch, - frangi_zyx=vesselness, - spacing=(scale_um[-1], scale_um[-1]), - extra_properties=[ - "aspect_ratio", - "circularity", - "frangi_intensity", - "texture", - "moments_hu", - ], - ) - - if not features_df.empty: - features_df["fov_name"] = "/" + well_id + "/" + pos_id - features_df["track_id"] = row["track_id"] - features_df["t"] = timepoint - features_df["x"] = row["x"] - features_df["y"] = row["y"] - - position_features.append(features_df) - - if position_features: - # Concatenate the list of DataFrames - position_df = pd.concat(position_features, ignore_index=True) - position_df.to_csv( - output_csv, - mode="a", - header=not os.path.exists(output_csv), - index=False, + + # Load position data + position = input_zarr[well_id + "/" + pos_id] + T, C, Z, Y, X = position.data.shape + channel_names = position.channel_names + scale_um = position.scale + + in_data = position.data.numpy() + + # Read the csv stored in each nucl seg zarr folder + file_name = "tracks_" + well_name + "_" + well_no + "_" + pos_id + ".csv" + nuclear_labels_csv = os.path.join( + nuclear_labels_path, well_id + "/" + pos_id + "/" + file_name + ) + nuclear_labels_df = pd.read_csv(nuclear_labels_csv) + organelle_data = in_data[:, channel_names.index(organelle_channel)] + + # Initialize an empty list to store values from each row of the csv + position_features = [] + for idx, row in nuclear_labels_df.iterrows(): + + if ( + row["x"] > patch_size // 2 + and row["y"] > patch_size // 2 + and row["x"] < X - patch_size // 2 + and row["y"] < Y - patch_size // 2 + ): + cell_centroid = row["x"], row["y"] + + timepoint = row["t"] + organelle_patch = get_patch( + organelle_data[int(timepoint), 0], cell_centroid, patch_size ) + organelle_patch = organelle_patch[np.newaxis] + organelle_patch = rescale_intensity( + organelle_patch, + out_range=(0, 1), + ) + min_radius_um = 0.15 # 300 nm diameter = ~2 pixels + max_radius_um = 0.6 # 1 µm diameter = ~6.7 pixels + sigma_range = calculate_nellie_sigmas( + min_radius_um, + max_radius_um, + scale_um[-1], # use the highest res axis (XY) + num_sigma=frangi_params["sigma_steps"], + ) + + labels, vesselness, optimal_sigma = segment_zyx( + organelle_patch, sigma_range=sigma_range, **frangi_params + ) + features_df = extract_features_zyx( + labels_zyx=labels, + intensity_zyx=organelle_patch, + frangi_zyx=vesselness, + spacing=(scale_um[-1], scale_um[-1]), + extra_properties=[ + "aspect_ratio", + "circularity", + "frangi_intensity", + "texture", + "moments_hu", + ], + ) + + if not features_df.empty: + features_df["fov_name"] = well_id + "/" + pos_id + features_df["track_id"] = row["track_id"] + features_df["t"] = timepoint + features_df["x"] = row["x"] + features_df["y"] = row["y"] + + position_features.append(features_df) + + if position_features: + # Concatenate the list of DataFrames + position_df = pd.concat(position_features, ignore_index=True) + return position_df + else: + return pd.DataFrame() + + input_zarr.close() + # nuclear_labels_zarr.close() + # %% +if __name__ == "__main__": + input_path = Path( + "/hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV/4-phenotyping/train-test/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV.zarr" + ) + input_zarr = open_ome_zarr(input_path, mode="r") + in_chans = input_zarr.channel_names + organelle_channel = "GFP EX488 EM525-45" + + nuclear_labels_path = "/hpc/projects/intracellular_dashboard/organelle_dynamics/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV/1-preprocess/label-free/3-track/2025_07_24_A549_SEC61_TOMM20_G3BP1_ZIKV_cropped.zarr" + # nuclear_labels_zarr = open_ome_zarr(nuclear_labels_path, mode="r") + # nuclear_labels_chans = nuclear_labels_zarr.channel_names + # nuclear_labels = "nuclei_prediction_labels_labels" + + output_root = ( + Path( + "/home/eduardo.hirata/repos/viscy/applications/pseudotime_analysis/organelle_segmentation/output" + ) + / input_path.stem + ) + output_root.mkdir(parents=True, exist_ok=True) + + output_csv = output_root / f"{input_path.stem}_mito_features.csv" + + PATCH_SIZE = 160 + frangi_params = { + "clahe_clip_limit": 0.01, # Mild contrast enhancement (0.01-0.03 range) + "sigma_steps": 2, # Multiple scales to capture size variation + "auto_optimize_sigma": False, # Use multi-scale (max across scales) + "frangi_alpha": 0.5, # Standard tubular structure sensitivity + "frangi_beta": 0.5, # Standard blob rejection + "threshold_method": "nellie_max", # CRITICAL: Manual threshold where auto methods fail + "min_object_size": 10, # Remove small noise clusters (20-50 pixels) + "apply_morphology": False, # Connect fragmented mitochondria + } + + # Number of parallel jobs - adjust based on your system + # -1 means use all available cores, or set to a specific number + n_jobs = -1 + + # Collect all (well_id, pos_id) pairs to process + with open_ome_zarr(input_path, mode="r") as input_zarr: + positions_to_process = [] + for well_id, well_data in input_zarr.wells(): + for pos_id, pos_data in well_data.positions(): + positions_to_process.append((well_id, pos_id)) + + print( + f"Processing {len(positions_to_process)} positions using {n_jobs} parallel jobs..." + ) + + # Process positions in parallel + results = Parallel(n_jobs=n_jobs, verbose=10)( + delayed(process_position)( + well_id=well_id, + pos_id=pos_id, + input_path=input_path, + nuclear_labels_path=nuclear_labels_path, + organelle_channel=organelle_channel, + patch_size=PATCH_SIZE, + frangi_params=frangi_params, + ) + for well_id, pos_id in positions_to_process + ) + + # Combine all results and save + all_features = [df for df in results if not df.empty] + if all_features: + final_df = pd.concat(all_features, ignore_index=True) + # Save all at once instead of appending + output_csv.parent.mkdir(parents=True, exist_ok=True) + final_df.to_csv(output_csv, index=False) + print(f"Saved {len(final_df)} features to {output_csv}")