From 9392298beabfb04b73234f66c02a3640124e1556 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Tue, 25 Nov 2025 09:23:53 -0800 Subject: [PATCH 01/14] initial commit --- examples/DynaCLR/vcp_tutorials/README.md | 17 ++ examples/DynaCLR/vcp_tutorials/quickstart.py | 271 +++++++++++++++++++ 2 files changed, 288 insertions(+) create mode 100644 examples/DynaCLR/vcp_tutorials/README.md create mode 100644 examples/DynaCLR/vcp_tutorials/quickstart.py diff --git a/examples/DynaCLR/vcp_tutorials/README.md b/examples/DynaCLR/vcp_tutorials/README.md new file mode 100644 index 000000000..d5c46ad33 --- /dev/null +++ b/examples/DynaCLR/vcp_tutorials/README.md @@ -0,0 +1,17 @@ +# Virtual Cell Platform Tutorials + +This directory contains tutorial notebooks for the Virtual Cell Platform, +available in both Python scripts and Jupyter notebooks. + +- [Quick Start](quick_start.ipynb): +get started with model inference in Python with a A549 cell dataset. + +## Development + +The development happens on the Python scripts, +which are converted to Jupyter notebooks with: + +```sh +# TODO: change the file name at the end to be the script to convert +jupytext --to ipynb --update-metadata '{"jupytext":{"cell_metadata_filter":"all"}}' --update quick_start.py +``` diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py new file mode 100644 index 000000000..edbd64229 --- /dev/null +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -0,0 +1,271 @@ +# %% [markdown] +""" +# Quick Start: DynaCLR (Cell Dynamics Contrastive Learning of Representations) + +**Estimated time to complete:** 15 minutes +""" + +# %% [markdown] +""" +# Learning Goals + +* Download the DynaCLR model and run it on an example dataset +* Visualize the learned embeddings +""" + +# %% [markdown] +""" +# Prerequisites +Python>=3.11 + +""" + +# %% [markdown] +""" +# Introduction + +## Model +The DynaCLR model architecture consists of three main components designed to map 3D multi-channel patches of single cells to a temporally regularized embedding space. + +## Example Dataset + +The A549 example dataset used in this quick-start guide contains +quantitative phase and paired fluorescence images of viral sensor reporter. +It is stored in OME-Zarr format and can be downloaded from +[here](https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test.zarr/). + +It has pre-computed statistics for normalization, generated using the `viscy preprocess` CLI. + +Refer to our [preprint](https://arxiv.org/abs/2410.11281) for more details +about how the dataset and model were generated. + +## User Data + +The DynaCLR-DENV-VS+Ph model only requires label-free (quantitative phase) and fluorescence images for inference. + +To run inference on your own data (Experimental): +- Convert the label-free images into the OME-Zarr data format using iohub or other +[tools](https://ngff.openmicroscopy.org/tools/index.html#file-conversion), +- Run [pre-processing](https://github.com/mehta-lab/VisCy/blob/main/docs/usage.md#preprocessing) +with the `viscy preprocess` CLI +- Generate pseudo-tracks or tracking data from [Ultrack](https://github.com/royerlab/ultrack) +""" + +# %% [markdown] +""" +# Setup + +The commands below will install the required packages and download the example dataset and model checkpoint. +It may take a few minutes to download all the files. + +## Setup Google Colab + +To run this quick-start guide using Google Colab, +choose the 'T4' GPU runtime from the "Connect" dropdown menu +in the upper-right corner of this notebook for faster execution. +Using a GPU significantly speeds up running model inference, but CPU compute can also be used. + +## Setup Local Environment + +The commands below assume a Unix-like shell with `wget` installed. +On Windows, the files can be downloaded manually from the URLs. +""" + +# %% +# Install VisCy with the optional dependencies for this example +# See the [repository](https://github.com/mehta-lab/VisCy) for more details +# !pip install "viscy[metrics,visual]==0.4.0a3" + +# %% +# restart kernel if running in Google Colab +if "get_ipython" in globals(): + session = get_ipython() # noqa: F821 + if "google.colab" in str(session): + print("Shutting down colab session.") + session.kernel.do_shutdown(restart=True) + +# %% +# Validate installation +# !viscy --help + +# %% +# Download the example tracks data +# !wget -m -np -nH --cut-dirs=7 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/track_test.zarr/" +# Download the example registered timelapse data +# !wget -m -np -nH --cut-dirs=7 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test.zarr/" +# Download the model checkpoint +# !wget -m -np -nH --cut-dirs=5 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt" + +# %% [markdown] +""" +# Run Model Inference + +The following code will run inference on a single field of view (FOV) of the example dataset. +This can also be achieved by using the VisCy CLI. +""" + +# %% +# %% +from pathlib import Path # noqa: E402 + +from iohub import open_ome_zarr # noqa: E402 +from torchview import draw_graph # noqa: E402 + +from viscy.data.triplet import TripletDataModule # noqa: E402 +from viscy.trainer import VisCyTrainer # noqa: E402 +from viscy.transforms import NormalizeSampled # noqa: E402 +from viscy.representation.embedding_writer import EmbeddingWriter # noqa: E402 +from viscy.representation.engine import ContrastiveModule # noqa: E402 +from anndata import read_zarr + +# %% +# NOTE: Nothing needs to be changed in this code block for the example to work. +# If using your own data, please modify the paths below. + +# TODO: Set download paths, by default the working directory is used +root_dir = Path() +# TODO: modify the path to the input dataset +input_data_path = root_dir / "registered_test.zarr" +# TODO: modify the path to the track dataset +tracks_path= root_dir/ "track_test.zarr" +# TODO: modify the path to the model checkpoint +model_ckpt_path = root_dir / "epoch=94-step=2375.ckpt" +#TODO" modify the path to load the extracted infected cell annotation +annotations_path = root_dir / "extracted_inf_state.csv" + +# TODO: modify the path to save the predictions +output_path = root_dir / "dynaclr_prediction.zarr" + +#%% +# NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks +fov_name_mock = "/A/3/9" +track_id_mock = [19] +fov_name_inf = "/B/4/9" +track_id_inf = [42] + +# Default parameters for the test dataset +z_range = (24, 29) +yx_patch_size = (160, 160) +channels_to_display = ["Phase3D", "RFP"] + +# %% +# Configure the data module for loading example images in prediction mode. +# See API documentation for how to use it with a different dataset. +# For example, View the documentation for the HCSDataModule class by running: +# ?HCSDataModule + +# %% +# Setup the data module to use the example dataset +datamodule = TripletDataModule( + data_path=input_data_path, + tracks_path=tracks_path, + source_channel=channels_to_display, + z_range=z_range, + initial_yx_patch_size=yx_patch_size, + final_yx_patch_size=yx_patch_size, + predict_cells=True, + batch_size=1, +) +datamodule.setup("predict") + +# %% +# Load the DynaCLR checkpoint from the downloaded checkpoint +# See this module for options to configure the model: + +# ?contrastive.ContrastiveEncoder + +# %% +dynaclr_model = ContrastiveEncoder.load_from_checkpoint( + model_ckpt_path, # checkpoint path + model_config={ + backbone: 'convnext_tiny', + in_channels: len(channels_to_display), + in_stack_depth: z_range, + stem_kernel_size: (5,4,4), + stem_stride:(5,4,4), + embedding_dim: 768, + projection_dim: 32, + drop_path_rate: 0.0, + }, +) + +# %% +# Visualize the model graph +model_graph = draw_graph( + dynaclr_model, + torch.ones((1,2,5,256,256), + graph_name="DynaCLR", + roll=True, + depth=3, + expand_nested=True, +) + +model_graph.visual_graph + +# %% +# Setup the trainer for prediction +# The trainer can be further configured to better utilize the available hardware, +# For example using GPUs and half precision. +# Callbacks can also be used to customize logging and prediction writing. +# See the API documentation for more details: +# ?VisCyTrainer + +# %% +# Initialize the trainer +# The prediction writer callback will save the predictions to an OME-Zarr store +trainer = VisCyTrainer(callbacks=[EmbeddingWriter(output_path, pca_kwargs={"n_components":8})]) + +# Run prediction +trainer.predict(model=dynaclr_model, datamodule=data_module, return_predictions=False) + +# %% [markdown] +""" +# Model Outputs + +The model outputs are also stored in an ANNData. The embeddings can then be visualized with a dimensionality reduction method (i.e UMAP, PHATE, PCA) +""" + +embeddings_anndata = read_zarr(output_path) +annotations = pd.read_csv(annotations_path) + +# %% +def get_patch(data, cell_centroid, patch_size): + """Extract patch centered on cell centroid across all channels. + + Parameters + ---------- + data : ndarray + Image data with shape (C, Y, X) or (Y, X) + cell_centroid : tuple + (y, x) coordinates of cell centroid + patch_size : int + Size of the square patch to extract + + Returns + ------- + ndarray + Extracted patch with shape (C, patch_size, patch_size) or (patch_size, patch_size) + """ + y_centroid, x_centroid = cell_centroid + x_start = max(0, x_centroid - patch_size // 2) + x_end = min(data.shape[-1], x_centroid + patch_size // 2) + y_start = max(0, y_centroid - patch_size // 2) + y_end = min(data.shape[-2], y_centroid + patch_size // 2) + + if data.ndim == 3: # CYX format + patch = data[:, int(y_start) : int(y_end), int(x_start) : int(x_end)] + else: # YX format + patch = data[int(y_start) : int(y_end), int(x_start) : int(x_end)] + return patch + + +# %% [markdown] +""" +## Responsible Use + +We are committed to advancing the responsible development and use of artificial intelligence. +Please follow our [Acceptable Use Policy](https://virtualcellmodels.cziscience.com/acceptable-use-policy) when engaging with our services. + +Should you have any security or privacy issues or questions related to the services, +please reach out to our team at [security@chanzuckerberg.com](mailto:security@chanzuckerberg.com) or [privacy@chanzuckerberg.com](mailto:privacy@chanzuckerberg.com) respectively. +""" From 741482e5ef7bb56fe3e19bed0762c3978a26c691 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Tue, 25 Nov 2025 10:58:42 -0800 Subject: [PATCH 02/14] add plotting --- examples/DynaCLR/vcp_tutorials/quickstart.py | 125 ++++++++++++++++--- 1 file changed, 108 insertions(+), 17 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index edbd64229..696500378 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -90,11 +90,11 @@ # %% # Download the example tracks data -# !wget -m -np -nH --cut-dirs=7 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/track_test.zarr/" +# !wget -m -np -nH --cut-dirs=6 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/track_test.zarr/" # Download the example registered timelapse data -# !wget -m -np -nH --cut-dirs=7 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test.zarr/" +# !wget -m -np -nH --cut-dirs=6 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test.zarr/" # Download the model checkpoint -# !wget -m -np -nH --cut-dirs=5 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt" +# !wget -m -np -nH --cut-dirs=5 "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt" # %% [markdown] """ @@ -116,7 +116,10 @@ from viscy.transforms import NormalizeSampled # noqa: E402 from viscy.representation.embedding_writer import EmbeddingWriter # noqa: E402 from viscy.representation.engine import ContrastiveModule # noqa: E402 -from anndata import read_zarr +from anndata import read_zarr # noqa: E402 +import matplotlib.pyplot as plt # noqa: E402 +import seaborn as sns # noqa: E402 +import pandas as pd # noqa: E402 # %% # NOTE: Nothing needs to be changed in this code block for the example to work. @@ -137,14 +140,8 @@ output_path = root_dir / "dynaclr_prediction.zarr" #%% -# NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks -fov_name_mock = "/A/3/9" -track_id_mock = [19] -fov_name_inf = "/B/4/9" -track_id_inf = [42] - # Default parameters for the test dataset -z_range = (24, 29) +z_range = (15, 45) yx_patch_size = (160, 160) channels_to_display = ["Phase3D", "RFP"] @@ -164,7 +161,7 @@ initial_yx_patch_size=yx_patch_size, final_yx_patch_size=yx_patch_size, predict_cells=True, - batch_size=1, + batch_size=8, ) datamodule.setup("predict") @@ -193,7 +190,7 @@ # Visualize the model graph model_graph = draw_graph( dynaclr_model, - torch.ones((1,2,5,256,256), + torch.ones((1,2,30,256,256), graph_name="DynaCLR", roll=True, depth=3, @@ -213,10 +210,10 @@ # %% # Initialize the trainer # The prediction writer callback will save the predictions to an OME-Zarr store -trainer = VisCyTrainer(callbacks=[EmbeddingWriter(output_path, pca_kwargs={"n_components":8})]) +trainer = VisCyTrainer(callbacks=[EmbeddingWriter(output_path, pca_kwargs={"n_components":8}, phate_kwargs={"knn":5, "decay":40,"n_jobs":-1})]) # Run prediction -trainer.predict(model=dynaclr_model, datamodule=data_module, return_predictions=False) +trainer.predict(model=dynaclr_model, datamodule=datamodule, return_predictions=False) # %% [markdown] """ @@ -225,10 +222,84 @@ The model outputs are also stored in an ANNData. The embeddings can then be visualized with a dimensionality reduction method (i.e UMAP, PHATE, PCA) """ -embeddings_anndata = read_zarr(output_path) -annotations = pd.read_csv(annotations_path) +# NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks +features_anndata = read_zarr(output_path) +annotation = pd.read_csv(annotations_path) +ANNOTATION_COLUMN = 'infection-state' + +# Combine embeddings and annotations +annotation["fov_name"] = annotation["fov_name"].str.strip("/") +annotation["fov_name"] = annotation["fov_name"].str.strip("/") + +annotation = annotation.set_index(["fov_name", "id"]) + +mi = pd.MultiIndex.from_arrays( + [features_anndata.obs["fov_name"], features_anndata.obs["id"]], names=["fov_name", "id"] +) +features_anndata.obs['annotations_infections_state'] = annotation.reindex(mi)[ANNOTATION_COLUMN] + +# Plot the PCA and PHATE embeddings colored by infection state +# Prepare data for plotting +plot_df = pd.DataFrame({ + 'PC1': features_anndata.obsm['X_pca'][:, 0], + 'PC2': features_anndata.obsm['X_pca'][:, 1], + 'PHATE1': features_anndata.obsm['X_phate'][:, 0], + 'PHATE2': features_anndata.obsm['X_phate'][:, 1], + 'infection_state': features_anndata.obs['annotations_infections_state'].fillna('unknown') +}) + +# Define color palette +color_palette = { + 'infected': 'orange', + 'uninfected': 'blue', + 'unknown': 'gray' +} + +# Create figure with two subplots +fig, axes = plt.subplots(1, 2, figsize=(14, 6)) + +# Plot PCA +sns.scatterplot( + data=plot_df, + x='PC1', + y='PC2', + hue='infection_state', + palette=color_palette, + ax=axes[0], + alpha=0.6, + s=20 +) +axes[0].set_title('PCA Embedding') +axes[0].set_xlabel('PC1') +axes[0].set_ylabel('PC2') + +# Plot PHATE +sns.scatterplot( + data=plot_df, + x='PHATE1', + y='PHATE2', + hue='infection_state', + palette=color_palette, + ax=axes[1], + alpha=0.6, + s=20 +) +axes[1].set_title('PHATE Embedding') +axes[1].set_xlabel('PHATE 1') +axes[1].set_ylabel('PHATE 2') + +plt.tight_layout() +plt.show() + # %% +# NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks +fov_name_mock = "A/3/9" +track_id_mock = [19] +fov_name_inf = "B/4/9" +track_id_inf = [42] + +## Show the images over time def get_patch(data, cell_centroid, patch_size): """Extract patch centered on cell centroid across all channels. @@ -258,6 +329,26 @@ def get_patch(data, cell_centroid, patch_size): patch = data[int(y_start) : int(y_end), int(x_start) : int(x_end)] return patch +# Open the dataset +plate = open_ome_zarr(input_data_path) +uninfected_position = plate[fov_name_mock][0] +infected_position = plate[fov_name_inf][0] + +# Filter the centroids of these two tracks +filtered_centroid_mock = features_anndata[(features_anndata["fov_name"] == fov_name_mock) &(features_anndata['track_id']==track_id_mock)] +filtered_centroid_inf = features_anndata[(features_anndata["fov_name"] == fov_name_inf) &(features_anndata['track_id']==track_id_inf)] + +uinfected_stack= [] +for idx, row in filtered_centroid_mock.iterrows(): + uinfected_stack.append(get_patch(cyx,(row['y'],row['x']),patch_size)) +uinfected_stack = np.array(uinfected_stack) + +infected_stack = [] +for idx, row in filtered_centroid_mock.iterrows(): + infected_stack.append(get_patch(cyx,(row['y'],row['x']),patch_size)) +infected_stack = np.array(infected_stack) + +# Plot 10 timepoints # %% [markdown] """ From f65ad1798fcb5f796eba25a76514429ccbca65ea Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Tue, 25 Nov 2025 11:04:50 -0800 Subject: [PATCH 03/14] ipywidgets --- examples/DynaCLR/vcp_tutorials/quickstart.py | 80 +++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index 696500378..de254f0b8 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -348,7 +348,85 @@ def get_patch(data, cell_centroid, patch_size): infected_stack.append(get_patch(cyx,(row['y'],row['x']),patch_size)) infected_stack = np.array(infected_stack) -# Plot 10 timepoints +# Interactive visualization for Google Colab +# This creates an interactive widget to scrub through timepoints +try: + from ipywidgets import interact, IntSlider + import numpy as np + + max_t = min(len(uinfected_stack), len(infected_stack)) + + def plot_timepoint(t): + """Plot both infected and uninfected cells at a specific timepoint""" + fig, axes = plt.subplots(2, 3, figsize=(15, 10)) + fig.suptitle(f'Timepoint: {t}', fontsize=16) + + # Plot uninfected cell + for channel_idx in range(3): + ax = axes[0, channel_idx] + img = uinfected_stack[t, channel_idx, :, :] + ax.imshow(img, cmap='gray') + ax.set_title(f'Uninfected - Ch {channel_idx}') + ax.axis('off') + + # Plot infected cell + for channel_idx in range(3): + ax = axes[1, channel_idx] + img = infected_stack[t, channel_idx, :, :] + ax.imshow(img, cmap='gray') + ax.set_title(f'Infected - Ch {channel_idx}') + ax.axis('off') + + plt.tight_layout() + plt.show() + + # Create interactive slider + interact(plot_timepoint, t=IntSlider(min=0, max=max_t-1, step=1, value=0, description='Timepoint:')) + +except ImportError: + # Fallback to static plot if ipywidgets not available + print("ipywidgets not available, showing static plots instead") + + # Plot 10 equally spaced timepoints + n_timepoints = 10 + max_t = min(len(uinfected_stack), len(infected_stack)) + timepoint_indices = np.linspace(0, max_t - 1, n_timepoints, dtype=int) + + # Create figure with 3 rows (channels) x 10 columns (timepoints) for uninfected + fig, axes = plt.subplots(3, n_timepoints, figsize=(20, 6)) + fig.suptitle('Uninfected Cell Over Time', fontsize=16, y=1.02) + + for channel_idx in range(3): + for col_idx, t_idx in enumerate(timepoint_indices): + ax = axes[channel_idx, col_idx] + img = uinfected_stack[t_idx, channel_idx, :, :] + ax.imshow(img, cmap='gray') + ax.axis('off') + if channel_idx == 0: + ax.set_title(f't={t_idx}', fontsize=10) + if col_idx == 0: + ax.set_ylabel(f'Channel {channel_idx}', fontsize=12) + + plt.tight_layout() + plt.show() + + # Create figure with 3 rows (channels) x 10 columns (timepoints) for infected + fig, axes = plt.subplots(3, n_timepoints, figsize=(20, 6)) + fig.suptitle('Infected Cell Over Time', fontsize=16, y=1.02) + + for channel_idx in range(3): + for col_idx, t_idx in enumerate(timepoint_indices): + ax = axes[channel_idx, col_idx] + img = infected_stack[t_idx, channel_idx, :, :] + ax.imshow(img, cmap='gray') + ax.axis('off') + if channel_idx == 0: + ax.set_title(f't={t_idx}', fontsize=10) + if col_idx == 0: + ax.set_ylabel(f'Channel {channel_idx}', fontsize=12) + + plt.tight_layout() + plt.show() # %% [markdown] """ From 7b961d5dc834a535467d524cb364840610c28acd Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Tue, 25 Nov 2025 15:19:11 -0800 Subject: [PATCH 04/14] add infection plotting and the sample images over time --- examples/DynaCLR/vcp_tutorials/quickstart.py | 315 ++++++++++++------- 1 file changed, 209 insertions(+), 106 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index de254f0b8..43a2ab019 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -2,7 +2,7 @@ """ # Quick Start: DynaCLR (Cell Dynamics Contrastive Learning of Representations) -**Estimated time to complete:** 15 minutes +**Estimated time to complete:** 25 minutes """ # %% [markdown] @@ -43,7 +43,7 @@ The DynaCLR-DENV-VS+Ph model only requires label-free (quantitative phase) and fluorescence images for inference. -To run inference on your own data (Experimental): +To run inference on your own data (Experimental): - Convert the label-free images into the OME-Zarr data format using iohub or other [tools](https://ngff.openmicroscopy.org/tools/index.html#file-conversion), - Run [pre-processing](https://github.com/mehta-lab/VisCy/blob/main/docs/usage.md#preprocessing) @@ -108,42 +108,64 @@ # %% from pathlib import Path # noqa: E402 +import matplotlib.pyplot as plt # noqa: E402 +import pandas as pd # noqa: E402 +import seaborn as sns # noqa: E402 +from anndata import read_zarr # noqa: E402 from iohub import open_ome_zarr # noqa: E402 from torchview import draw_graph # noqa: E402 from viscy.data.triplet import TripletDataModule # noqa: E402 -from viscy.trainer import VisCyTrainer # noqa: E402 -from viscy.transforms import NormalizeSampled # noqa: E402 from viscy.representation.embedding_writer import EmbeddingWriter # noqa: E402 -from viscy.representation.engine import ContrastiveModule # noqa: E402 -from anndata import read_zarr # noqa: E402 -import matplotlib.pyplot as plt # noqa: E402 -import seaborn as sns # noqa: E402 -import pandas as pd # noqa: E402 +from viscy.representation.engine import ( + ContrastiveEncoder, + ContrastiveModule, +) # noqa: E402 +from viscy.trainer import VisCyTrainer # noqa: E402 +from viscy.transforms import ( # noqa: E402 + NormalizeSampled, + ScaleIntensityRangePercentilesd, +) # %% # NOTE: Nothing needs to be changed in this code block for the example to work. # If using your own data, please modify the paths below. # TODO: Set download paths, by default the working directory is used -root_dir = Path() +root_dir = Path("/hpc/websites/public.czbiohub.org/comp.micro/viscy") # TODO: modify the path to the input dataset -input_data_path = root_dir / "registered_test.zarr" +input_data_path = ( + root_dir + / "DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse" + / "registered_test.zarr" +) # TODO: modify the path to the track dataset -tracks_path= root_dir/ "track_test.zarr" +tracks_path = ( + root_dir + / "DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse" + / "track_test.zarr" +) # TODO: modify the path to the model checkpoint -model_ckpt_path = root_dir / "epoch=94-step=2375.ckpt" -#TODO" modify the path to load the extracted infected cell annotation -annotations_path = root_dir / "extracted_inf_state.csv" +model_ckpt_path = ( + root_dir / "DynaCLR_models/DynaCLR-DENV/VS_n_Ph" / "epoch=94-step=2375.ckpt" +) +# TODO" modify the path to load the extracted infected cell annotation +annotations_path = ( + root_dir + / "DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse" + / "extracted_inf_state.csv" +) # TODO: modify the path to save the predictions -output_path = root_dir / "dynaclr_prediction.zarr" +output_path = ( + Path("/home/eduardo.hirata/mydata/tmp/dynaclr_demo") / "dynaclr_prediction.zarr" +) -#%% +# %% # Default parameters for the test dataset -z_range = (15, 45) +z_range = [15, 45] yx_patch_size = (160, 160) -channels_to_display = ["Phase3D", "RFP"] +channels_to_display = ["Phase3D", "RFP"] # label-free and viral sensor # %% # Configure the data module for loading example images in prediction mode. @@ -160,8 +182,24 @@ z_range=z_range, initial_yx_patch_size=yx_patch_size, final_yx_patch_size=yx_patch_size, - predict_cells=True, - batch_size=8, + # predict_cells=True, + batch_size=64, # TODO reduce this number if you see OOM errors when running the trainer + num_workers=1, + normalizations=[ + NormalizeSampled( + ["Phase3D"], + level="fov_statistics", + subtrahend="mean", + divisor="std", + ), + ScaleIntensityRangePercentilesd( + ["RFP"], + lower=50, + upper=99, + b_min=0.0, + b_max=1.0, + ), + ], ) datamodule.setup("predict") @@ -172,25 +210,26 @@ # ?contrastive.ContrastiveEncoder # %% -dynaclr_model = ContrastiveEncoder.load_from_checkpoint( - model_ckpt_path, # checkpoint path - model_config={ - backbone: 'convnext_tiny', - in_channels: len(channels_to_display), - in_stack_depth: z_range, - stem_kernel_size: (5,4,4), - stem_stride:(5,4,4), - embedding_dim: 768, - projection_dim: 32, - drop_path_rate: 0.0, - }, +dynaclr_model = ContrastiveModule.load_from_checkpoint( + model_ckpt_path, # checkpoint path + encoder=ContrastiveEncoder( + backbone="convnext_tiny", + in_channels=len(channels_to_display), + in_stack_depth=z_range[1] - z_range[0], + stem_kernel_size=(5, 4, 4), + stem_stride=(5, 4, 4), + embedding_dim=768, + projection_dim=32, + drop_path_rate=0.0, + ), + example_input_array_shape=(1, 2, 30, 256, 256), ) # %% # Visualize the model graph model_graph = draw_graph( dynaclr_model, - torch.ones((1,2,30,256,256), + dynaclr_model.example_input_array, graph_name="DynaCLR", roll=True, depth=3, @@ -210,7 +249,15 @@ # %% # Initialize the trainer # The prediction writer callback will save the predictions to an OME-Zarr store -trainer = VisCyTrainer(callbacks=[EmbeddingWriter(output_path, pca_kwargs={"n_components":8}, phate_kwargs={"knn":5, "decay":40,"n_jobs":-1})]) +trainer = VisCyTrainer( + callbacks=[ + EmbeddingWriter( + output_path, + pca_kwargs={"n_components": 8}, + phate_kwargs={"knn": 5, "decay": 40, "n_jobs": -1}, + ) + ] +) # Run prediction trainer.predict(model=dynaclr_model, datamodule=datamodule, return_predictions=False) @@ -225,34 +272,51 @@ # NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks features_anndata = read_zarr(output_path) annotation = pd.read_csv(annotations_path) -ANNOTATION_COLUMN = 'infection-state' +ANNOTATION_COLUMN = "infection_state" # Combine embeddings and annotations -annotation["fov_name"] = annotation["fov_name"].str.strip("/") -annotation["fov_name"] = annotation["fov_name"].str.strip("/") +# Reload annotation to ensure clean state (in case cell is re-run) +annotation = pd.read_csv(annotations_path) -annotation = annotation.set_index(["fov_name", "id"]) +# Strip whitespace from fov_name to match features +annotation["fov_name"] = annotation["fov_name"].str.strip() +# Merge on (fov_name, track_id, t) as these uniquely identify each cell observation +annotation_indexed = annotation.set_index(["fov_name", "track_id", "t"]) mi = pd.MultiIndex.from_arrays( - [features_anndata.obs["fov_name"], features_anndata.obs["id"]], names=["fov_name", "id"] + [ + features_anndata.obs["fov_name"], + features_anndata.obs["track_id"], + features_anndata.obs["t"], + ], + names=["fov_name", "track_id", "t"], ) -features_anndata.obs['annotations_infections_state'] = annotation.reindex(mi)[ANNOTATION_COLUMN] +features_anndata.obs["annotations_infections_state"] = annotation_indexed.reindex(mi)[ + ANNOTATION_COLUMN +].values # Plot the PCA and PHATE embeddings colored by infection state # Prepare data for plotting -plot_df = pd.DataFrame({ - 'PC1': features_anndata.obsm['X_pca'][:, 0], - 'PC2': features_anndata.obsm['X_pca'][:, 1], - 'PHATE1': features_anndata.obsm['X_phate'][:, 0], - 'PHATE2': features_anndata.obsm['X_phate'][:, 1], - 'infection_state': features_anndata.obs['annotations_infections_state'].fillna('unknown') -}) - -# Define color palette +# Map numeric labels to readable labels for legend +infection_state_labels = {0: "Unknown", 1: "Uninfected", 2: "Infected"} + +plot_df = pd.DataFrame( + { + "PC1": features_anndata.obsm["X_pca"][:, 0], + "PC2": features_anndata.obsm["X_pca"][:, 1], + "PHATE1": features_anndata.obsm["X_phate"][:, 0], + "PHATE2": features_anndata.obsm["X_phate"][:, 1], + "infection_state": features_anndata.obs["annotations_infections_state"] + .fillna(0) + .map(infection_state_labels), + } +) + +# Define color palette (colorblind-friendly: blue for uninfected, orange for infected) color_palette = { - 'infected': 'orange', - 'uninfected': 'blue', - 'unknown': 'gray' + "Unknown": "lightgray", # Unlabeled + "Uninfected": "cornflowerblue", # Uninfected + "Infected": "darkorange", # Infected } # Create figure with two subplots @@ -261,32 +325,32 @@ # Plot PCA sns.scatterplot( data=plot_df, - x='PC1', - y='PC2', - hue='infection_state', + x="PC1", + y="PC2", + hue="infection_state", palette=color_palette, ax=axes[0], alpha=0.6, - s=20 + s=20, ) -axes[0].set_title('PCA Embedding') -axes[0].set_xlabel('PC1') -axes[0].set_ylabel('PC2') +axes[0].set_title("PCA Embedding") +axes[0].set_xlabel("PC1") +axes[0].set_ylabel("PC2") # Plot PHATE sns.scatterplot( data=plot_df, - x='PHATE1', - y='PHATE2', - hue='infection_state', + x="PHATE1", + y="PHATE2", + hue="infection_state", palette=color_palette, ax=axes[1], alpha=0.6, - s=20 + s=20, ) -axes[1].set_title('PHATE Embedding') -axes[1].set_xlabel('PHATE 1') -axes[1].set_ylabel('PHATE 2') +axes[1].set_title("PHATE Embedding") +axes[1].set_xlabel("PHATE 1") +axes[1].set_ylabel("PHATE 2") plt.tight_layout() plt.show() @@ -299,6 +363,7 @@ fov_name_inf = "B/4/9" track_id_inf = [42] + ## Show the images over time def get_patch(data, cell_centroid, patch_size): """Extract patch centered on cell centroid across all channels. @@ -329,59 +394,96 @@ def get_patch(data, cell_centroid, patch_size): patch = data[int(y_start) : int(y_end), int(x_start) : int(x_end)] return patch + # Open the dataset plate = open_ome_zarr(input_data_path) -uninfected_position = plate[fov_name_mock][0] -infected_position = plate[fov_name_inf][0] +uninfected_position = plate[fov_name_mock] +infected_position = plate[fov_name_inf] -# Filter the centroids of these two tracks -filtered_centroid_mock = features_anndata[(features_anndata["fov_name"] == fov_name_mock) &(features_anndata['track_id']==track_id_mock)] -filtered_centroid_inf = features_anndata[(features_anndata["fov_name"] == fov_name_inf) &(features_anndata['track_id']==track_id_inf)] +# Get channel indices for the channels we want to display +channel_names = uninfected_position.channel_names +channels_to_display_idx = [channel_names.index(c) for c in channels_to_display] -uinfected_stack= [] +# Filter the centroids of these two tracks +filtered_centroid_mock = features_anndata.obs[ + (features_anndata.obs["fov_name"] == fov_name_mock) + & (features_anndata.obs["track_id"].isin(track_id_mock)) +].sort_values("t") +filtered_centroid_inf = features_anndata.obs[ + (features_anndata.obs["fov_name"] == fov_name_inf) + & (features_anndata.obs["track_id"].isin(track_id_inf)) +].sort_values("t") + +# Define patch size for visualization +patch_size = 160 + +# Extract patches for uninfected cells over time +import numpy as np + +uinfected_stack = [] for idx, row in filtered_centroid_mock.iterrows(): - uinfected_stack.append(get_patch(cyx,(row['y'],row['x']),patch_size)) + t = int(row["t"]) + # Load the image data for this timepoint (CZYX format), select only required channels + img_data = uninfected_position.data[ + t, channels_to_display_idx, z_range[0] : z_range[1] + ] + # Take max projection along Z axis to get CYX + cyx = img_data.max(axis=1) + uinfected_stack.append(get_patch(cyx, (row["y"], row["x"]), patch_size)) uinfected_stack = np.array(uinfected_stack) +# Extract patches for infected cells over time infected_stack = [] -for idx, row in filtered_centroid_mock.iterrows(): - infected_stack.append(get_patch(cyx,(row['y'],row['x']),patch_size)) +for idx, row in filtered_centroid_inf.iterrows(): + t = int(row["t"]) + # Load the image data for this timepoint (CZYX format), select only required channels + img_data = infected_position.data[ + t, channels_to_display_idx, z_range[0] : z_range[1] + ] + # Take max projection along Z axis to get CYX + cyx = img_data.max(axis=1) + infected_stack.append(get_patch(cyx, (row["y"], row["x"]), patch_size)) infected_stack = np.array(infected_stack) # Interactive visualization for Google Colab # This creates an interactive widget to scrub through timepoints try: - from ipywidgets import interact, IntSlider import numpy as np + from ipywidgets import IntSlider, interact max_t = min(len(uinfected_stack), len(infected_stack)) def plot_timepoint(t): """Plot both infected and uninfected cells at a specific timepoint""" - fig, axes = plt.subplots(2, 3, figsize=(15, 10)) - fig.suptitle(f'Timepoint: {t}', fontsize=16) + fig, axes = plt.subplots(2, 2, figsize=(10, 10)) + fig.suptitle(f"Timepoint: {t}", fontsize=16) # Plot uninfected cell - for channel_idx in range(3): + for channel_idx, channel_name in enumerate(channels_to_display): ax = axes[0, channel_idx] img = uinfected_stack[t, channel_idx, :, :] - ax.imshow(img, cmap='gray') - ax.set_title(f'Uninfected - Ch {channel_idx}') - ax.axis('off') + ax.imshow(img, cmap="gray") + ax.set_title(f"Uninfected - {channel_name}") + ax.axis("off") # Plot infected cell - for channel_idx in range(3): + channel_names = uninfected_position.channel_names + channels_to_display_idx = [channel_names.index(c) for c in channels_to_display] + for channel_idx, channel_name in enumerate(channels_to_display_idx): ax = axes[1, channel_idx] img = infected_stack[t, channel_idx, :, :] - ax.imshow(img, cmap='gray') - ax.set_title(f'Infected - Ch {channel_idx}') - ax.axis('off') + ax.imshow(img, cmap="gray") + ax.set_title(f"Infected - {channel_name}") + ax.axis("off") plt.tight_layout() plt.show() # Create interactive slider - interact(plot_timepoint, t=IntSlider(min=0, max=max_t-1, step=1, value=0, description='Timepoint:')) + interact( + plot_timepoint, + t=IntSlider(min=0, max=max_t - 1, step=1, value=0, description="Timepoint:"), + ) except ImportError: # Fallback to static plot if ipywidgets not available @@ -392,41 +494,42 @@ def plot_timepoint(t): max_t = min(len(uinfected_stack), len(infected_stack)) timepoint_indices = np.linspace(0, max_t - 1, n_timepoints, dtype=int) - # Create figure with 3 rows (channels) x 10 columns (timepoints) for uninfected - fig, axes = plt.subplots(3, n_timepoints, figsize=(20, 6)) - fig.suptitle('Uninfected Cell Over Time', fontsize=16, y=1.02) - - for channel_idx in range(3): + # Create figure with 2 rows (channels) x 10 columns (timepoints) for uninfected + fig, axes = plt.subplots(2, n_timepoints, figsize=(20, 4)) + fig.suptitle("Uninfected Cell Over Time", fontsize=16, y=1.02) + channel_names = uninfected_position.channel_names + channels_to_display_idx = [channel_names.index(c) for c in channels_to_display] + for channel_idx, channel_name in enumerate(channels_to_display): for col_idx, t_idx in enumerate(timepoint_indices): ax = axes[channel_idx, col_idx] img = uinfected_stack[t_idx, channel_idx, :, :] - ax.imshow(img, cmap='gray') - ax.axis('off') + ax.imshow(img, cmap="gray") + ax.axis("off") if channel_idx == 0: - ax.set_title(f't={t_idx}', fontsize=10) + ax.set_title(f"t={t_idx}", fontsize=10) if col_idx == 0: - ax.set_ylabel(f'Channel {channel_idx}', fontsize=12) + ax.set_ylabel(channel_name, fontsize=12) plt.tight_layout() plt.show() - # Create figure with 3 rows (channels) x 10 columns (timepoints) for infected - fig, axes = plt.subplots(3, n_timepoints, figsize=(20, 6)) - fig.suptitle('Infected Cell Over Time', fontsize=16, y=1.02) + # Create figure with 2 rows (channels) x 10 columns (timepoints) for infected + fig, axes = plt.subplots(2, n_timepoints, figsize=(20, 4)) + fig.suptitle("Infected Cell Over Time", fontsize=16, y=1.02) - for channel_idx in range(3): + for channel_idx, channel_name in enumerate(channels_to_display): for col_idx, t_idx in enumerate(timepoint_indices): ax = axes[channel_idx, col_idx] img = infected_stack[t_idx, channel_idx, :, :] - ax.imshow(img, cmap='gray') - ax.axis('off') + ax.imshow(img, cmap="gray") + ax.axis("off") if channel_idx == 0: - ax.set_title(f't={t_idx}', fontsize=10) + ax.set_title(f"t={t_idx}", fontsize=10) if col_idx == 0: - ax.set_ylabel(f'Channel {channel_idx}', fontsize=12) + ax.set_ylabel(channel_name, fontsize=12) plt.tight_layout() - plt.show() + plt.show() # %% [markdown] """ From 9d1f8392eaa87b9a5df8c789624286d57ce1ea73 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Tue, 25 Nov 2025 15:33:00 -0800 Subject: [PATCH 05/14] remove max projecting the phase images --- examples/DynaCLR/vcp_tutorials/quickstart.py | 26 +++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index 43a2ab019..fe518ac99 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -427,8 +427,17 @@ def get_patch(data, cell_centroid, patch_size): img_data = uninfected_position.data[ t, channels_to_display_idx, z_range[0] : z_range[1] ] - # Take max projection along Z axis to get CYX - cyx = img_data.max(axis=1) + # For Phase3D take middle slice, for fluorescence take max projection + cyx = [] + for ch_idx, ch_name in enumerate(channels_to_display): + if ch_name == "Phase3D": + # Take middle Z slice for phase + mid_z = img_data.shape[1] // 2 + cyx.append(img_data[ch_idx, mid_z, :, :]) + else: + # Max projection for fluorescence + cyx.append(img_data[ch_idx].max(axis=0)) + cyx = np.array(cyx) uinfected_stack.append(get_patch(cyx, (row["y"], row["x"]), patch_size)) uinfected_stack = np.array(uinfected_stack) @@ -440,8 +449,17 @@ def get_patch(data, cell_centroid, patch_size): img_data = infected_position.data[ t, channels_to_display_idx, z_range[0] : z_range[1] ] - # Take max projection along Z axis to get CYX - cyx = img_data.max(axis=1) + # For Phase3D take middle slice, for fluorescence take max projection + cyx = [] + for ch_idx, ch_name in enumerate(channels_to_display): + if ch_name == "Phase3D": + # Take middle Z slice for phase + mid_z = img_data.shape[1] // 2 + cyx.append(img_data[ch_idx, mid_z, :, :]) + else: + # Max projection for fluorescence + cyx.append(img_data[ch_idx].max(axis=0)) + cyx = np.array(cyx) infected_stack.append(get_patch(cyx, (row["y"], row["x"]), patch_size)) infected_stack = np.array(infected_stack) From dc9f42a6a9cc8e44e1ee442728c4ca3f37f48131 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Tue, 25 Nov 2025 15:41:37 -0800 Subject: [PATCH 06/14] remove hardcoded paths and add ipynb --- examples/DynaCLR/vcp_tutorials/README.md | 2 +- .../DynaCLR/vcp_tutorials/quickstart.ipynb | 732 ++++++++++++++++++ examples/DynaCLR/vcp_tutorials/quickstart.py | 42 +- 3 files changed, 749 insertions(+), 27 deletions(-) create mode 100644 examples/DynaCLR/vcp_tutorials/quickstart.ipynb diff --git a/examples/DynaCLR/vcp_tutorials/README.md b/examples/DynaCLR/vcp_tutorials/README.md index d5c46ad33..c4e8fb763 100644 --- a/examples/DynaCLR/vcp_tutorials/README.md +++ b/examples/DynaCLR/vcp_tutorials/README.md @@ -13,5 +13,5 @@ which are converted to Jupyter notebooks with: ```sh # TODO: change the file name at the end to be the script to convert -jupytext --to ipynb --update-metadata '{"jupytext":{"cell_metadata_filter":"all"}}' --update quick_start.py +jupytext --to ipynb --update-metadata '{"jupytext":{"cell_metadata_filter":"all"}}' --update quickstart.py ``` diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb new file mode 100644 index 000000000..ca3f81b26 --- /dev/null +++ b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb @@ -0,0 +1,732 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "50f182b0", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Quick Start: DynaCLR (Cell Dynamics Contrastive Learning of Representations)\n", + "\n", + "**Estimated time to complete:** 25 minutes" + ] + }, + { + "cell_type": "markdown", + "id": "cb89ad18", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Learning Goals\n", + "\n", + "* Download the DynaCLR model and run it on an example dataset\n", + "* Visualize the learned embeddings" + ] + }, + { + "cell_type": "markdown", + "id": "adc0b19b", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Prerequisites\n", + "Python>=3.11" + ] + }, + { + "cell_type": "markdown", + "id": "aefd594f", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Introduction\n", + "\n", + "## Model\n", + "The DynaCLR model architecture consists of three main components designed to map 3D multi-channel patches of single cells to a temporally regularized embedding space.\n", + "\n", + "## Example Dataset\n", + "\n", + "The A549 example dataset used in this quick-start guide contains\n", + "quantitative phase and paired fluorescence images of viral sensor reporter.\n", + "It is stored in OME-Zarr format and can be downloaded from\n", + "[here](https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test.zarr/).\n", + "\n", + "It has pre-computed statistics for normalization, generated using the `viscy preprocess` CLI.\n", + "\n", + "Refer to our [preprint](https://arxiv.org/abs/2410.11281) for more details\n", + "about how the dataset and model were generated.\n", + "\n", + "## User Data\n", + "\n", + "The DynaCLR-DENV-VS+Ph model only requires label-free (quantitative phase) and fluorescence images for inference.\n", + "\n", + "To run inference on your own data (Experimental):\n", + "- Convert the label-free images into the OME-Zarr data format using iohub or other\n", + "[tools](https://ngff.openmicroscopy.org/tools/index.html#file-conversion),\n", + "- Run [pre-processing](https://github.com/mehta-lab/VisCy/blob/main/docs/usage.md#preprocessing)\n", + "with the `viscy preprocess` CLI\n", + "- Generate pseudo-tracks or tracking data from [Ultrack](https://github.com/royerlab/ultrack)" + ] + }, + { + "cell_type": "markdown", + "id": "c61d4e73", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Setup\n", + "\n", + "The commands below will install the required packages and download the example dataset and model checkpoint.\n", + "It may take a few minutes to download all the files.\n", + "\n", + "## Setup Google Colab\n", + "\n", + "To run this quick-start guide using Google Colab,\n", + "choose the 'T4' GPU runtime from the \"Connect\" dropdown menu\n", + "in the upper-right corner of this notebook for faster execution.\n", + "Using a GPU significantly speeds up running model inference, but CPU compute can also be used.\n", + "\n", + "## Setup Local Environment\n", + "\n", + "The commands below assume a Unix-like shell with `wget` installed.\n", + "On Windows, the files can be downloaded manually from the URLs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "506cea61", + "metadata": {}, + "outputs": [], + "source": [ + "# Install VisCy with the optional dependencies for this example\n", + "# See the [repository](https://github.com/mehta-lab/VisCy) for more details\n", + "!pip install \"viscy[metrics,visual]==0.4.0a3\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "338b5cde", + "metadata": {}, + "outputs": [], + "source": [ + "# restart kernel if running in Google Colab\n", + "if \"get_ipython\" in globals():\n", + " session = get_ipython() # noqa: F821\n", + " if \"google.colab\" in str(session):\n", + " print(\"Shutting down colab session.\")\n", + " session.kernel.do_shutdown(restart=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a00861b4", + "metadata": {}, + "outputs": [], + "source": [ + "# Validate installation\n", + "!viscy --help" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4044b3a7", + "metadata": {}, + "outputs": [], + "source": [ + "# Download the example tracks data\n", + "!wget -m -np -nH --cut-dirs=6 -R \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/track_test.zarr/\"\n", + "# Download the example registered timelapse data\n", + "!wget -m -np -nH --cut-dirs=6 -R \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test.zarr/\"\n", + "# Download the model checkpoint\n", + "!wget -m -np -nH --cut-dirs=5 \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt\"" + ] + }, + { + "cell_type": "markdown", + "id": "50c3215e", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Run Model Inference\n", + "\n", + "The following code will run inference on a single field of view (FOV) of the example dataset.\n", + "This can also be achieved by using the VisCy CLI." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b630d93b", + "metadata": { + "lines_to_next_cell": 0 + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3f5bbb7", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path # noqa: E402\n", + "\n", + "import matplotlib.pyplot as plt # noqa: E402\n", + "import pandas as pd # noqa: E402\n", + "import seaborn as sns # noqa: E402\n", + "from anndata import read_zarr # noqa: E402\n", + "from iohub import open_ome_zarr # noqa: E402\n", + "from torchview import draw_graph # noqa: E402\n", + "\n", + "from viscy.data.triplet import TripletDataModule # noqa: E402\n", + "from viscy.representation.embedding_writer import EmbeddingWriter # noqa: E402\n", + "from viscy.representation.engine import (\n", + " ContrastiveEncoder,\n", + " ContrastiveModule,\n", + ") # noqa: E402\n", + "from viscy.trainer import VisCyTrainer # noqa: E402\n", + "from viscy.transforms import ( # noqa: E402\n", + " NormalizeSampled,\n", + " ScaleIntensityRangePercentilesd,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a75bdc6b", + "metadata": {}, + "outputs": [], + "source": [ + "# NOTE: Nothing needs to be changed in this code block for the example to work.\n", + "# If using your own data, please modify the paths below.\n", + "\n", + "# TODO: Set download paths, by default the working directory is used\n", + "root_dir = Path(\"\")\n", + "# TODO: modify the path to the input dataset\n", + "input_data_path = root_dir / \"registered_test.zarr\"\n", + "# TODO: modify the path to the track dataset\n", + "tracks_path = root_dir / \"track_test.zarr\"\n", + "# TODO: modify the path to the model checkpoint\n", + "model_ckpt_path = root_dir / \"epoch=94-step=2375.ckpt\"\n", + "# TODO\" modify the path to load the extracted infected cell annotation\n", + "annotations_path = root_dir / \"extracted_inf_state.csv\"\n", + "\n", + "# TODO: modify the path to save the predictions\n", + "output_path = root_dir / \"dynaclr_prediction.zarr\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f26a53fa", + "metadata": {}, + "outputs": [], + "source": [ + "# Default parameters for the test dataset\n", + "z_range = [15, 45]\n", + "yx_patch_size = (160, 160)\n", + "channels_to_display = [\"Phase3D\", \"RFP\"] # label-free and viral sensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55d169ea", + "metadata": {}, + "outputs": [], + "source": [ + "# Configure the data module for loading example images in prediction mode.\n", + "# See API documentation for how to use it with a different dataset.\n", + "# For example, View the documentation for the TripletDataModule class by running:\n", + "?TripletDataModule" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "072a46b3", + "metadata": {}, + "outputs": [], + "source": [ + "# Setup the data module to use the example dataset\n", + "datamodule = TripletDataModule(\n", + " data_path=input_data_path,\n", + " tracks_path=tracks_path,\n", + " source_channel=channels_to_display,\n", + " z_range=z_range,\n", + " initial_yx_patch_size=yx_patch_size,\n", + " final_yx_patch_size=yx_patch_size,\n", + " # predict_cells=True,\n", + " batch_size=64, # TODO reduce this number if you see OOM errors when running the trainer\n", + " num_workers=1,\n", + " normalizations=[\n", + " NormalizeSampled(\n", + " [\"Phase3D\"],\n", + " level=\"fov_statistics\",\n", + " subtrahend=\"mean\",\n", + " divisor=\"std\",\n", + " ),\n", + " ScaleIntensityRangePercentilesd(\n", + " [\"RFP\"],\n", + " lower=50,\n", + " upper=99,\n", + " b_min=0.0,\n", + " b_max=1.0,\n", + " ),\n", + " ],\n", + ")\n", + "datamodule.setup(\"predict\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2bb0416", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the DynaCLR checkpoint from the downloaded checkpoint\n", + "# See this module for options to configure the model:\n", + "\n", + "?ContrastiveModule\n", + "?ContrastiveEncoder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af5ac380", + "metadata": {}, + "outputs": [], + "source": [ + "dynaclr_model = ContrastiveModule.load_from_checkpoint(\n", + " model_ckpt_path, # checkpoint path\n", + " encoder=ContrastiveEncoder(\n", + " backbone=\"convnext_tiny\",\n", + " in_channels=len(channels_to_display),\n", + " in_stack_depth=z_range[1] - z_range[0],\n", + " stem_kernel_size=(5, 4, 4),\n", + " stem_stride=(5, 4, 4),\n", + " embedding_dim=768,\n", + " projection_dim=32,\n", + " drop_path_rate=0.0,\n", + " ),\n", + " example_input_array_shape=(1, 2, 30, 256, 256),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aee38cf6", + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize the model graph\n", + "model_graph = draw_graph(\n", + " dynaclr_model,\n", + " dynaclr_model.example_input_array,\n", + " graph_name=\"DynaCLR\",\n", + " roll=True,\n", + " depth=3,\n", + " expand_nested=True,\n", + ")\n", + "\n", + "model_graph.visual_graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b3055fc", + "metadata": {}, + "outputs": [], + "source": [ + "# Setup the trainer for prediction\n", + "# The trainer can be further configured to better utilize the available hardware,\n", + "# For example using GPUs and half precision.\n", + "# Callbacks can also be used to customize logging and prediction writing.\n", + "# See the API documentation for more details:\n", + "?VisCyTrainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "119b2c28", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the trainer\n", + "# The prediction writer callback will save the predictions to an OME-Zarr store\n", + "trainer = VisCyTrainer(\n", + " callbacks=[\n", + " EmbeddingWriter(\n", + " output_path,\n", + " pca_kwargs={\"n_components\": 8},\n", + " phate_kwargs={\"knn\": 5, \"decay\": 40, \"n_jobs\": -1},\n", + " )\n", + " ]\n", + ")\n", + "\n", + "# Run prediction\n", + "trainer.predict(model=dynaclr_model, datamodule=datamodule, return_predictions=False)" + ] + }, + { + "cell_type": "markdown", + "id": "d214f42a", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 0 + }, + "source": [ + "# Model Outputs\n", + "\n", + "The model outputs are also stored in an ANNData. The embeddings can then be visualized with a dimensionality reduction method (i.e UMAP, PHATE, PCA)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "813b4555", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks\n", + "features_anndata = read_zarr(output_path)\n", + "annotation = pd.read_csv(annotations_path)\n", + "ANNOTATION_COLUMN = \"infection_state\"\n", + "\n", + "# Combine embeddings and annotations\n", + "# Reload annotation to ensure clean state (in case cell is re-run)\n", + "annotation = pd.read_csv(annotations_path)\n", + "\n", + "# Strip whitespace from fov_name to match features\n", + "annotation[\"fov_name\"] = annotation[\"fov_name\"].str.strip()\n", + "\n", + "# Merge on (fov_name, track_id, t) as these uniquely identify each cell observation\n", + "annotation_indexed = annotation.set_index([\"fov_name\", \"track_id\", \"t\"])\n", + "mi = pd.MultiIndex.from_arrays(\n", + " [\n", + " features_anndata.obs[\"fov_name\"],\n", + " features_anndata.obs[\"track_id\"],\n", + " features_anndata.obs[\"t\"],\n", + " ],\n", + " names=[\"fov_name\", \"track_id\", \"t\"],\n", + ")\n", + "features_anndata.obs[\"annotations_infections_state\"] = annotation_indexed.reindex(mi)[\n", + " ANNOTATION_COLUMN\n", + "].values\n", + "\n", + "# Plot the PCA and PHATE embeddings colored by infection state\n", + "# Prepare data for plotting\n", + "# Map numeric labels to readable labels for legend\n", + "infection_state_labels = {0: \"Unknown\", 1: \"Uninfected\", 2: \"Infected\"}\n", + "\n", + "plot_df = pd.DataFrame(\n", + " {\n", + " \"PC1\": features_anndata.obsm[\"X_pca\"][:, 0],\n", + " \"PC2\": features_anndata.obsm[\"X_pca\"][:, 1],\n", + " \"PHATE1\": features_anndata.obsm[\"X_phate\"][:, 0],\n", + " \"PHATE2\": features_anndata.obsm[\"X_phate\"][:, 1],\n", + " \"infection_state\": features_anndata.obs[\"annotations_infections_state\"]\n", + " .fillna(0)\n", + " .map(infection_state_labels),\n", + " }\n", + ")\n", + "\n", + "# Define color palette (colorblind-friendly: blue for uninfected, orange for infected)\n", + "color_palette = {\n", + " \"Unknown\": \"lightgray\", # Unlabeled\n", + " \"Uninfected\": \"cornflowerblue\", # Uninfected\n", + " \"Infected\": \"darkorange\", # Infected\n", + "}\n", + "\n", + "# Create figure with two subplots\n", + "fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n", + "\n", + "# Plot PCA\n", + "sns.scatterplot(\n", + " data=plot_df,\n", + " x=\"PC1\",\n", + " y=\"PC2\",\n", + " hue=\"infection_state\",\n", + " palette=color_palette,\n", + " ax=axes[0],\n", + " alpha=0.6,\n", + " s=20,\n", + ")\n", + "axes[0].set_title(\"PCA Embedding\")\n", + "axes[0].set_xlabel(\"PC1\")\n", + "axes[0].set_ylabel(\"PC2\")\n", + "\n", + "# Plot PHATE\n", + "sns.scatterplot(\n", + " data=plot_df,\n", + " x=\"PHATE1\",\n", + " y=\"PHATE2\",\n", + " hue=\"infection_state\",\n", + " palette=color_palette,\n", + " ax=axes[1],\n", + " alpha=0.6,\n", + " s=20,\n", + ")\n", + "axes[1].set_title(\"PHATE Embedding\")\n", + "axes[1].set_xlabel(\"PHATE 1\")\n", + "axes[1].set_ylabel(\"PHATE 2\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "b93c5b32", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Visualize the images over time. This shows the phase images and fluorescence images of the uninfected and infected cells over time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "893fa329", + "metadata": {}, + "outputs": [], + "source": [ + "# NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks\n", + "fov_name_mock = \"A/3/9\"\n", + "track_id_mock = [19]\n", + "fov_name_inf = \"B/4/9\"\n", + "track_id_inf = [42]\n", + "\n", + "\n", + "## Show the images over time\n", + "def get_patch(data, cell_centroid, patch_size):\n", + " \"\"\"Extract patch centered on cell centroid across all channels.\n", + "\n", + " Parameters\n", + " ----------\n", + " data : ndarray\n", + " Image data with shape (C, Y, X) or (Y, X)\n", + " cell_centroid : tuple\n", + " (y, x) coordinates of cell centroid\n", + " patch_size : int\n", + " Size of the square patch to extract\n", + "\n", + " Returns\n", + " -------\n", + " ndarray\n", + " Extracted patch with shape (C, patch_size, patch_size) or (patch_size, patch_size)\n", + " \"\"\"\n", + " y_centroid, x_centroid = cell_centroid\n", + " x_start = max(0, x_centroid - patch_size // 2)\n", + " x_end = min(data.shape[-1], x_centroid + patch_size // 2)\n", + " y_start = max(0, y_centroid - patch_size // 2)\n", + " y_end = min(data.shape[-2], y_centroid + patch_size // 2)\n", + "\n", + " if data.ndim == 3: # CYX format\n", + " patch = data[:, int(y_start) : int(y_end), int(x_start) : int(x_end)]\n", + " else: # YX format\n", + " patch = data[int(y_start) : int(y_end), int(x_start) : int(x_end)]\n", + " return patch\n", + "\n", + "\n", + "# Open the dataset\n", + "plate = open_ome_zarr(input_data_path)\n", + "uninfected_position = plate[fov_name_mock]\n", + "infected_position = plate[fov_name_inf]\n", + "\n", + "# Get channel indices for the channels we want to display\n", + "channel_names = uninfected_position.channel_names\n", + "channels_to_display_idx = [channel_names.index(c) for c in channels_to_display]\n", + "\n", + "# Filter the centroids of these two tracks\n", + "filtered_centroid_mock = features_anndata.obs[\n", + " (features_anndata.obs[\"fov_name\"] == fov_name_mock)\n", + " & (features_anndata.obs[\"track_id\"].isin(track_id_mock))\n", + "].sort_values(\"t\")\n", + "filtered_centroid_inf = features_anndata.obs[\n", + " (features_anndata.obs[\"fov_name\"] == fov_name_inf)\n", + " & (features_anndata.obs[\"track_id\"].isin(track_id_inf))\n", + "].sort_values(\"t\")\n", + "\n", + "# Define patch size for visualization\n", + "patch_size = 160\n", + "\n", + "# Extract patches for uninfected cells over time\n", + "import numpy as np\n", + "\n", + "uinfected_stack = []\n", + "for idx, row in filtered_centroid_mock.iterrows():\n", + " t = int(row[\"t\"])\n", + " # Load the image data for this timepoint (CZYX format), select only required channels\n", + " img_data = uninfected_position.data[\n", + " t, channels_to_display_idx, z_range[0] : z_range[1]\n", + " ]\n", + " # For Phase3D take middle slice, for fluorescence take max projection\n", + " cyx = []\n", + " for ch_idx, ch_name in enumerate(channels_to_display):\n", + " if ch_name == \"Phase3D\":\n", + " # Take middle Z slice for phase\n", + " mid_z = img_data.shape[1] // 2\n", + " cyx.append(img_data[ch_idx, mid_z, :, :])\n", + " else:\n", + " # Max projection for fluorescence\n", + " cyx.append(img_data[ch_idx].max(axis=0))\n", + " cyx = np.array(cyx)\n", + " uinfected_stack.append(get_patch(cyx, (row[\"y\"], row[\"x\"]), patch_size))\n", + "uinfected_stack = np.array(uinfected_stack)\n", + "\n", + "# Extract patches for infected cells over time\n", + "infected_stack = []\n", + "for idx, row in filtered_centroid_inf.iterrows():\n", + " t = int(row[\"t\"])\n", + " # Load the image data for this timepoint (CZYX format), select only required channels\n", + " img_data = infected_position.data[\n", + " t, channels_to_display_idx, z_range[0] : z_range[1]\n", + " ]\n", + " # For Phase3D take middle slice, for fluorescence take max projection\n", + " cyx = []\n", + " for ch_idx, ch_name in enumerate(channels_to_display):\n", + " if ch_name == \"Phase3D\":\n", + " # Take middle Z slice for phase\n", + " mid_z = img_data.shape[1] // 2\n", + " cyx.append(img_data[ch_idx, mid_z, :, :])\n", + " else:\n", + " # Max projection for fluorescence\n", + " cyx.append(img_data[ch_idx].max(axis=0))\n", + " cyx = np.array(cyx)\n", + " infected_stack.append(get_patch(cyx, (row[\"y\"], row[\"x\"]), patch_size))\n", + "infected_stack = np.array(infected_stack)\n", + "\n", + "# Interactive visualization for Google Colab\n", + "# This creates an interactive widget to scrub through timepoints\n", + "try:\n", + " import numpy as np\n", + " from ipywidgets import IntSlider, interact\n", + "\n", + " max_t = min(len(uinfected_stack), len(infected_stack))\n", + "\n", + " def plot_timepoint(t):\n", + " \"\"\"Plot both infected and uninfected cells at a specific timepoint\"\"\"\n", + " fig, axes = plt.subplots(2, 2, figsize=(10, 10))\n", + " fig.suptitle(f\"Timepoint: {t}\", fontsize=16)\n", + "\n", + " # Plot uninfected cell\n", + " for channel_idx, channel_name in enumerate(channels_to_display):\n", + " ax = axes[0, channel_idx]\n", + " img = uinfected_stack[t, channel_idx, :, :]\n", + " ax.imshow(img, cmap=\"gray\")\n", + " ax.set_title(f\"Uninfected - {channel_name}\")\n", + " ax.axis(\"off\")\n", + "\n", + " # Plot infected cell\n", + " channel_names = uninfected_position.channel_names\n", + " channels_to_display_idx = [channel_names.index(c) for c in channels_to_display]\n", + " for channel_idx, channel_name in enumerate(channels_to_display_idx):\n", + " ax = axes[1, channel_idx]\n", + " img = infected_stack[t, channel_idx, :, :]\n", + " ax.imshow(img, cmap=\"gray\")\n", + " ax.set_title(f\"Infected - {channel_name}\")\n", + " ax.axis(\"off\")\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + " # Create interactive slider\n", + " interact(\n", + " plot_timepoint,\n", + " t=IntSlider(min=0, max=max_t - 1, step=1, value=0, description=\"Timepoint:\"),\n", + " )\n", + "\n", + "except ImportError:\n", + " # Fallback to static plot if ipywidgets not available\n", + " print(\"ipywidgets not available, showing static plots instead\")\n", + "\n", + " # Plot 10 equally spaced timepoints\n", + " n_timepoints = 10\n", + " max_t = min(len(uinfected_stack), len(infected_stack))\n", + " timepoint_indices = np.linspace(0, max_t - 1, n_timepoints, dtype=int)\n", + "\n", + " # Create figure with 2 rows (channels) x 10 columns (timepoints) for uninfected\n", + " fig, axes = plt.subplots(2, n_timepoints, figsize=(20, 4))\n", + " fig.suptitle(\"Uninfected Cell Over Time\", fontsize=16, y=1.02)\n", + " channel_names = uninfected_position.channel_names\n", + " channels_to_display_idx = [channel_names.index(c) for c in channels_to_display]\n", + " for channel_idx, channel_name in enumerate(channels_to_display):\n", + " for col_idx, t_idx in enumerate(timepoint_indices):\n", + " ax = axes[channel_idx, col_idx]\n", + " img = uinfected_stack[t_idx, channel_idx, :, :]\n", + " ax.imshow(img, cmap=\"gray\")\n", + " ax.axis(\"off\")\n", + " if channel_idx == 0:\n", + " ax.set_title(f\"t={t_idx}\", fontsize=10)\n", + " if col_idx == 0:\n", + " ax.set_ylabel(channel_name, fontsize=12)\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + " # Create figure with 2 rows (channels) x 10 columns (timepoints) for infected\n", + " fig, axes = plt.subplots(2, n_timepoints, figsize=(20, 4))\n", + " fig.suptitle(\"Infected Cell Over Time\", fontsize=16, y=1.02)\n", + "\n", + " for channel_idx, channel_name in enumerate(channels_to_display):\n", + " for col_idx, t_idx in enumerate(timepoint_indices):\n", + " ax = axes[channel_idx, col_idx]\n", + " img = infected_stack[t_idx, channel_idx, :, :]\n", + " ax.imshow(img, cmap=\"gray\")\n", + " ax.axis(\"off\")\n", + " if channel_idx == 0:\n", + " ax.set_title(f\"t={t_idx}\", fontsize=10)\n", + " if col_idx == 0:\n", + " ax.set_ylabel(channel_name, fontsize=12)\n", + "\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "b6056a26", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Responsible Use\n", + "\n", + "We are committed to advancing the responsible development and use of artificial intelligence.\n", + "Please follow our [Acceptable Use Policy](https://virtualcellmodels.cziscience.com/acceptable-use-policy) when engaging with our services.\n", + "\n", + "Should you have any security or privacy issues or questions related to the services,\n", + "please reach out to our team at [security@chanzuckerberg.com](mailto:security@chanzuckerberg.com) or [privacy@chanzuckerberg.com](mailto:privacy@chanzuckerberg.com) respectively." + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all", + "main_language": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index fe518ac99..7af85120c 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -132,34 +132,18 @@ # If using your own data, please modify the paths below. # TODO: Set download paths, by default the working directory is used -root_dir = Path("/hpc/websites/public.czbiohub.org/comp.micro/viscy") +root_dir = Path("") # TODO: modify the path to the input dataset -input_data_path = ( - root_dir - / "DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse" - / "registered_test.zarr" -) +input_data_path = root_dir / "registered_test.zarr" # TODO: modify the path to the track dataset -tracks_path = ( - root_dir - / "DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse" - / "track_test.zarr" -) +tracks_path = root_dir / "track_test.zarr" # TODO: modify the path to the model checkpoint -model_ckpt_path = ( - root_dir / "DynaCLR_models/DynaCLR-DENV/VS_n_Ph" / "epoch=94-step=2375.ckpt" -) +model_ckpt_path = root_dir / "epoch=94-step=2375.ckpt" # TODO" modify the path to load the extracted infected cell annotation -annotations_path = ( - root_dir - / "DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse" - / "extracted_inf_state.csv" -) +annotations_path = root_dir / "extracted_inf_state.csv" # TODO: modify the path to save the predictions -output_path = ( - Path("/home/eduardo.hirata/mydata/tmp/dynaclr_demo") / "dynaclr_prediction.zarr" -) +output_path = root_dir / "dynaclr_prediction.zarr" # %% # Default parameters for the test dataset @@ -170,8 +154,8 @@ # %% # Configure the data module for loading example images in prediction mode. # See API documentation for how to use it with a different dataset. -# For example, View the documentation for the HCSDataModule class by running: -# ?HCSDataModule +# For example, View the documentation for the TripletDataModule class by running: +# ?TripletDataModule # %% # Setup the data module to use the example dataset @@ -207,7 +191,8 @@ # Load the DynaCLR checkpoint from the downloaded checkpoint # See this module for options to configure the model: -# ?contrastive.ContrastiveEncoder +# ?ContrastiveModule +# ?ContrastiveEncoder # %% dynaclr_model = ContrastiveModule.load_from_checkpoint( @@ -268,7 +253,7 @@ The model outputs are also stored in an ANNData. The embeddings can then be visualized with a dimensionality reduction method (i.e UMAP, PHATE, PCA) """ - +# %% # NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks features_anndata = read_zarr(output_path) annotation = pd.read_csv(annotations_path) @@ -356,6 +341,11 @@ plt.show() +# %% [markdown] +""" +# Visualize the images over time. This shows the phase images and fluorescence images of the uninfected and infected cells over time. +""" + # %% # NOTE: We have chosen these tracks to be representative of the data. Feel free to open the dataset and select other tracks fov_name_mock = "A/3/9" From 9fae8e255b90756a7a0df5929a2a7ab038da0e62 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Wed, 26 Nov 2025 19:41:44 -0800 Subject: [PATCH 07/14] crop to 0-30 z slices --- examples/DynaCLR/vcp_tutorials/quickstart.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index 7af85120c..c71285ffa 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -134,7 +134,7 @@ # TODO: Set download paths, by default the working directory is used root_dir = Path("") # TODO: modify the path to the input dataset -input_data_path = root_dir / "registered_test.zarr" +input_data_path = root_dir / "registered_test_demo_cropped.zarr" # TODO: modify the path to the track dataset tracks_path = root_dir / "track_test.zarr" # TODO: modify the path to the model checkpoint @@ -147,7 +147,7 @@ # %% # Default parameters for the test dataset -z_range = [15, 45] +z_range = [0, 30] yx_patch_size = (160, 160) channels_to_display = ["Phase3D", "RFP"] # label-free and viral sensor @@ -343,7 +343,8 @@ # %% [markdown] """ -# Visualize the images over time. This shows the phase images and fluorescence images of the uninfected and infected cells over time. +## Visualize Images Over Time +Below we show phase and fluorescence images of the uninfected and infected cells over time. """ # %% From 1310b419f9f5ab5949732316cea4ec9160773ef6 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Wed, 26 Nov 2025 19:42:58 -0800 Subject: [PATCH 08/14] update quixkstart --- .../DynaCLR/vcp_tutorials/quickstart.ipynb | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb index ca3f81b26..71e747fa4 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb +++ b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "50f182b0", + "id": "cb32a304", "metadata": { "cell_marker": "\"\"\"" }, @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "cb89ad18", + "id": "594146e3", "metadata": { "cell_marker": "\"\"\"" }, @@ -27,7 +27,7 @@ }, { "cell_type": "markdown", - "id": "adc0b19b", + "id": "9c0321c2", "metadata": { "cell_marker": "\"\"\"" }, @@ -38,7 +38,7 @@ }, { "cell_type": "markdown", - "id": "aefd594f", + "id": "30f4e5b8", "metadata": { "cell_marker": "\"\"\"" }, @@ -74,7 +74,7 @@ }, { "cell_type": "markdown", - "id": "c61d4e73", + "id": "cb8339ef", "metadata": { "cell_marker": "\"\"\"" }, @@ -100,7 +100,7 @@ { "cell_type": "code", "execution_count": null, - "id": "506cea61", + "id": "e676e581", "metadata": {}, "outputs": [], "source": [ @@ -112,7 +112,7 @@ { "cell_type": "code", "execution_count": null, - "id": "338b5cde", + "id": "650827c5", "metadata": {}, "outputs": [], "source": [ @@ -127,7 +127,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a00861b4", + "id": "df5a8ee6", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +138,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4044b3a7", + "id": "a6126ba4", "metadata": {}, "outputs": [], "source": [ @@ -152,7 +152,7 @@ }, { "cell_type": "markdown", - "id": "50c3215e", + "id": "dcbb752d", "metadata": { "cell_marker": "\"\"\"" }, @@ -166,7 +166,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b630d93b", + "id": "af399514", "metadata": { "lines_to_next_cell": 0 }, @@ -176,7 +176,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a3f5bbb7", + "id": "f18f90ce", "metadata": {}, "outputs": [], "source": [ @@ -205,7 +205,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a75bdc6b", + "id": "9fc31403", "metadata": {}, "outputs": [], "source": [ @@ -215,7 +215,7 @@ "# TODO: Set download paths, by default the working directory is used\n", "root_dir = Path(\"\")\n", "# TODO: modify the path to the input dataset\n", - "input_data_path = root_dir / \"registered_test.zarr\"\n", + "input_data_path = root_dir / \"registered_test_demo_cropped.zarr\"\n", "# TODO: modify the path to the track dataset\n", "tracks_path = root_dir / \"track_test.zarr\"\n", "# TODO: modify the path to the model checkpoint\n", @@ -230,12 +230,12 @@ { "cell_type": "code", "execution_count": null, - "id": "f26a53fa", + "id": "269b852e", "metadata": {}, "outputs": [], "source": [ "# Default parameters for the test dataset\n", - "z_range = [15, 45]\n", + "z_range = [0, 30]\n", "yx_patch_size = (160, 160)\n", "channels_to_display = [\"Phase3D\", \"RFP\"] # label-free and viral sensor" ] @@ -243,7 +243,7 @@ { "cell_type": "code", "execution_count": null, - "id": "55d169ea", + "id": "d3ac0231", "metadata": {}, "outputs": [], "source": [ @@ -256,7 +256,7 @@ { "cell_type": "code", "execution_count": null, - "id": "072a46b3", + "id": "0a102372", "metadata": {}, "outputs": [], "source": [ @@ -293,7 +293,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d2bb0416", + "id": "779f2953", "metadata": {}, "outputs": [], "source": [ @@ -307,7 +307,7 @@ { "cell_type": "code", "execution_count": null, - "id": "af5ac380", + "id": "cb91a331", "metadata": {}, "outputs": [], "source": [ @@ -330,7 +330,7 @@ { "cell_type": "code", "execution_count": null, - "id": "aee38cf6", + "id": "5e9afbef", "metadata": {}, "outputs": [], "source": [ @@ -350,7 +350,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0b3055fc", + "id": "211cd3bb", "metadata": {}, "outputs": [], "source": [ @@ -365,7 +365,7 @@ { "cell_type": "code", "execution_count": null, - "id": "119b2c28", + "id": "e52a86d8", "metadata": {}, "outputs": [], "source": [ @@ -387,7 +387,7 @@ }, { "cell_type": "markdown", - "id": "d214f42a", + "id": "e6f85ba8", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 0 @@ -401,7 +401,7 @@ { "cell_type": "code", "execution_count": null, - "id": "813b4555", + "id": "ed4d08b9", "metadata": { "lines_to_next_cell": 2 }, @@ -496,18 +496,19 @@ }, { "cell_type": "markdown", - "id": "b93c5b32", + "id": "9b71e916", "metadata": { "cell_marker": "\"\"\"" }, "source": [ - "# Visualize the images over time. This shows the phase images and fluorescence images of the uninfected and infected cells over time." + "## Visualize Images Over Time\n", + "Below we show phase and fluorescence images of the uninfected and infected cells over time." ] }, { "cell_type": "code", "execution_count": null, - "id": "893fa329", + "id": "a49b837e", "metadata": {}, "outputs": [], "source": [ @@ -706,7 +707,7 @@ }, { "cell_type": "markdown", - "id": "b6056a26", + "id": "85c9e727", "metadata": { "cell_marker": "\"\"\"" }, From 8b54ee0c7c57ad09a29cd039fae4b9d29ba354d6 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Thu, 27 Nov 2025 07:21:04 -0800 Subject: [PATCH 09/14] fixing the links and addressing karina's comments --- .../DynaCLR/vcp_tutorials/quickstart.ipynb | 70 +++++++++---------- examples/DynaCLR/vcp_tutorials/quickstart.py | 52 +++++++------- 2 files changed, 61 insertions(+), 61 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb index 71e747fa4..8ac6b1c32 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb +++ b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb @@ -7,9 +7,10 @@ "cell_marker": "\"\"\"" }, "source": [ - "# Quick Start: DynaCLR (Cell Dynamics Contrastive Learning of Representations)\n", + "# Quick Start: DynaCLR\n", + "## Cell Dynamics Contrastive Learning of Representations\n", "\n", - "**Estimated time to complete:** 25 minutes" + "**Estimated time to complete:** 25-30 minutes" ] }, { @@ -19,7 +20,7 @@ "cell_marker": "\"\"\"" }, "source": [ - "# Learning Goals\n", + "## Learning Goals\n", "\n", "* Download the DynaCLR model and run it on an example dataset\n", "* Visualize the learned embeddings" @@ -33,7 +34,7 @@ }, "source": [ "# Prerequisites\n", - "Python>=3.11" + "- Python>=3.11" ] }, { @@ -43,12 +44,12 @@ "cell_marker": "\"\"\"" }, "source": [ - "# Introduction\n", + "## Introduction\n", "\n", - "## Model\n", + "### Model\n", "The DynaCLR model architecture consists of three main components designed to map 3D multi-channel patches of single cells to a temporally regularized embedding space.\n", "\n", - "## Example Dataset\n", + "### Example Dataset\n", "\n", "The A549 example dataset used in this quick-start guide contains\n", "quantitative phase and paired fluorescence images of viral sensor reporter.\n", @@ -60,7 +61,7 @@ "Refer to our [preprint](https://arxiv.org/abs/2410.11281) for more details\n", "about how the dataset and model were generated.\n", "\n", - "## User Data\n", + "### User Data\n", "\n", "The DynaCLR-DENV-VS+Ph model only requires label-free (quantitative phase) and fluorescence images for inference.\n", "\n", @@ -76,25 +77,24 @@ "cell_type": "markdown", "id": "cb8339ef", "metadata": { - "cell_marker": "\"\"\"" + "lines_to_next_cell": 0 }, "source": [ - "# Setup\n", + "\"\"\"\n", + "Setup\n", "\n", "The commands below will install the required packages and download the example dataset and model checkpoint.\n", - "It may take a few minutes to download all the files.\n", "\n", - "## Setup Google Colab\n", + "Setup notes:\n", "\n", - "To run this quick-start guide using Google Colab,\n", - "choose the 'T4' GPU runtime from the \"Connect\" dropdown menu\n", - "in the upper-right corner of this notebook for faster execution.\n", + "- **Setting up Google Colab**: To run this quickstart guide using Google Colab, choose the 'T4' GPU runtime from the \"Connect\" dropdown menu in the upper-right corner of this notebook for faster execution.\n", "Using a GPU significantly speeds up running model inference, but CPU compute can also be used.\n", "\n", - "## Setup Local Environment\n", + "- **Setting up local environment**: The commands below assume a Unix-like shell with `wget` installed. On Windows, the files can be downloaded manually from the URLs.\n", + "On Windows, the files can be downloaded manually from the URLs.\n", "\n", - "The commands below assume a Unix-like shell with `wget` installed.\n", - "On Windows, the files can be downloaded manually from the URLs." + "\"\"\"\n", + "## Install VisCy" ] }, { @@ -116,7 +116,7 @@ "metadata": {}, "outputs": [], "source": [ - "# restart kernel if running in Google Colab\n", + "# Restart kernel if running in Google Colab\n", "if \"get_ipython\" in globals():\n", " session = get_ipython() # noqa: F821\n", " if \"google.colab\" in str(session):\n", @@ -142,11 +142,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Download the example tracks data\n", + "# Estimated download time: 15-20 minutes wall clock time\n", + "#\n", + "# Download the example tracks data (5-8 minutes)\n", "!wget -m -np -nH --cut-dirs=6 -R \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/track_test.zarr/\"\n", - "# Download the example registered timelapse data\n", - "!wget -m -np -nH --cut-dirs=6 -R \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test.zarr/\"\n", - "# Download the model checkpoint\n", + "# Download the example registered timelapse data (5-10 minutes)\n", + "!wget -m -np -nH --cut-dirs=6 -R \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test_demo_crop.zarr/\"\n", + "# Download the model checkpoint (3 minutes)\n", "!wget -m -np -nH --cut-dirs=5 \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt\"" ] }, @@ -157,22 +159,12 @@ "cell_marker": "\"\"\"" }, "source": [ - "# Run Model Inference\n", + "## Run Model Inference\n", "\n", "The following code will run inference on a single field of view (FOV) of the example dataset.\n", "This can also be achieved by using the VisCy CLI." ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "af399514", - "metadata": { - "lines_to_next_cell": 0 - }, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, @@ -215,7 +207,7 @@ "# TODO: Set download paths, by default the working directory is used\n", "root_dir = Path(\"\")\n", "# TODO: modify the path to the input dataset\n", - "input_data_path = root_dir / \"registered_test_demo_cropped.zarr\"\n", + "input_data_path = root_dir / \"registered_test_demo_crop.zarr\"\n", "# TODO: modify the path to the track dataset\n", "tracks_path = root_dir / \"track_test.zarr\"\n", "# TODO: modify the path to the model checkpoint\n", @@ -393,7 +385,7 @@ "lines_to_next_cell": 0 }, "source": [ - "# Model Outputs\n", + "## Model Outputs\n", "\n", "The model outputs are also stored in an ANNData. The embeddings can then be visualized with a dimensionality reduction method (i.e UMAP, PHATE, PCA)" ] @@ -712,6 +704,9 @@ "cell_marker": "\"\"\"" }, "source": [ + "## Contact Information\n", + "For issues with this notebook please contact eduardo.hirata@czbiohub.org.\n", + "\n", "## Responsible Use\n", "\n", "We are committed to advancing the responsible development and use of artificial intelligence.\n", @@ -726,6 +721,9 @@ "jupytext": { "cell_metadata_filter": "all", "main_language": "python" + }, + "language_info": { + "name": "python" } }, "nbformat": 4, diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index c71285ffa..3c9533e97 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -1,13 +1,14 @@ # %% [markdown] """ -# Quick Start: DynaCLR (Cell Dynamics Contrastive Learning of Representations) +# Quick Start: DynaCLR +## Cell Dynamics Contrastive Learning of Representations -**Estimated time to complete:** 25 minutes +**Estimated time to complete:** 25-30 minutes """ # %% [markdown] """ -# Learning Goals +## Learning Goals * Download the DynaCLR model and run it on an example dataset * Visualize the learned embeddings @@ -16,18 +17,18 @@ # %% [markdown] """ # Prerequisites -Python>=3.11 +- Python>=3.11 """ # %% [markdown] """ -# Introduction +## Introduction -## Model +### Model The DynaCLR model architecture consists of three main components designed to map 3D multi-channel patches of single cells to a temporally regularized embedding space. -## Example Dataset +### Example Dataset The A549 example dataset used in this quick-start guide contains quantitative phase and paired fluorescence images of viral sensor reporter. @@ -39,7 +40,7 @@ Refer to our [preprint](https://arxiv.org/abs/2410.11281) for more details about how the dataset and model were generated. -## User Data +### User Data The DynaCLR-DENV-VS+Ph model only requires label-free (quantitative phase) and fluorescence images for inference. @@ -56,28 +57,24 @@ # Setup The commands below will install the required packages and download the example dataset and model checkpoint. -It may take a few minutes to download all the files. -## Setup Google Colab +Setup notes: -To run this quick-start guide using Google Colab, -choose the 'T4' GPU runtime from the "Connect" dropdown menu -in the upper-right corner of this notebook for faster execution. +- **Setting up Google Colab**: To run this quickstart guide using Google Colab, choose the 'T4' GPU runtime from the "Connect" dropdown menu in the upper-right corner of this notebook for faster execution. Using a GPU significantly speeds up running model inference, but CPU compute can also be used. -## Setup Local Environment - -The commands below assume a Unix-like shell with `wget` installed. +- **Setting up local environment**: The commands below assume a Unix-like shell with `wget` installed. On Windows, the files can be downloaded manually from the URLs. On Windows, the files can be downloaded manually from the URLs. -""" +""" +### Install VisCy # %% # Install VisCy with the optional dependencies for this example # See the [repository](https://github.com/mehta-lab/VisCy) for more details # !pip install "viscy[metrics,visual]==0.4.0a3" # %% -# restart kernel if running in Google Colab +# Restart kernel if running in Google Colab if "get_ipython" in globals(): session = get_ipython() # noqa: F821 if "google.colab" in str(session): @@ -89,16 +86,18 @@ # !viscy --help # %% -# Download the example tracks data +# Estimated download time: 15-20 minutes wall clock time +# +# Download the example tracks data (5-8 minutes) # !wget -m -np -nH --cut-dirs=6 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/track_test.zarr/" -# Download the example registered timelapse data -# !wget -m -np -nH --cut-dirs=6 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test.zarr/" -# Download the model checkpoint +# Download the example registered timelapse data (5-10 minutes) +# !wget -m -np -nH --cut-dirs=6 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test_demo_crop.zarr/" +# Download the model checkpoint (3 minutes) # !wget -m -np -nH --cut-dirs=5 "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt" # %% [markdown] """ -# Run Model Inference +## Run Model Inference The following code will run inference on a single field of view (FOV) of the example dataset. This can also be achieved by using the VisCy CLI. @@ -134,7 +133,7 @@ # TODO: Set download paths, by default the working directory is used root_dir = Path("") # TODO: modify the path to the input dataset -input_data_path = root_dir / "registered_test_demo_cropped.zarr" +input_data_path = root_dir / "registered_test_demo_crop.zarr" # TODO: modify the path to the track dataset tracks_path = root_dir / "track_test.zarr" # TODO: modify the path to the model checkpoint @@ -249,7 +248,7 @@ # %% [markdown] """ -# Model Outputs +## Model Outputs The model outputs are also stored in an ANNData. The embeddings can then be visualized with a dimensionality reduction method (i.e UMAP, PHATE, PCA) """ @@ -542,6 +541,9 @@ def plot_timepoint(t): # %% [markdown] """ +## Contact Information +For issues with this notebook please contact eduardo.hirata@czbiohub.org. + ## Responsible Use We are committed to advancing the responsible development and use of artificial intelligence. From 03d71f0d6ee7630bc515166486252ee590357923 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Fri, 28 Nov 2025 05:55:19 -0800 Subject: [PATCH 10/14] fix prerequisites and double quote comment --- examples/DynaCLR/vcp_tutorials/quickstart.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index 3c9533e97..93b21ac79 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -16,7 +16,7 @@ # %% [markdown] """ -# Prerequisites +## Prerequisites - Python>=3.11 """ @@ -60,11 +60,10 @@ Setup notes: -- **Setting up Google Colab**: To run this quickstart guide using Google Colab, choose the 'T4' GPU runtime from the "Connect" dropdown menu in the upper-right corner of this notebook for faster execution. +- **Setting up Google Colab**: To run this quickstart guide using Google Colab, choose the 'T4' GPU runtime from the 'Connect' dropdown menu in the upper-right corner of this notebook for faster execution. Using a GPU significantly speeds up running model inference, but CPU compute can also be used. - **Setting up local environment**: The commands below assume a Unix-like shell with `wget` installed. On Windows, the files can be downloaded manually from the URLs. -On Windows, the files can be downloaded manually from the URLs. """ ### Install VisCy From 76684e0e5532d868d9e2396b22cb73da5c226f03 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Fri, 28 Nov 2025 06:00:16 -0800 Subject: [PATCH 11/14] add download for the 'extracted_inf_state.csv' --- examples/DynaCLR/vcp_tutorials/quickstart.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index 93b21ac79..bd99a2784 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -93,6 +93,9 @@ # !wget -m -np -nH --cut-dirs=6 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test_demo_crop.zarr/" # Download the model checkpoint (3 minutes) # !wget -m -np -nH --cut-dirs=5 "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt" +# Download the annotations for the infected state +# !wget -m -np -nH --cut-dirs=6 "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/extracted_inf_state.csv" + # %% [markdown] """ From ab92b74a51ec52dd6fc54e205795ea0c6f8cf895 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Fri, 28 Nov 2025 06:09:27 -0800 Subject: [PATCH 12/14] fixing formatting for the 'setup' and recompiling the ipynb --- .../DynaCLR/vcp_tutorials/quickstart.ipynb | 76 +++++++++---------- examples/DynaCLR/vcp_tutorials/quickstart.py | 6 +- 2 files changed, 40 insertions(+), 42 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb index 8ac6b1c32..dec660145 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb +++ b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "cb32a304", + "id": "36b436bf", "metadata": { "cell_marker": "\"\"\"" }, @@ -15,7 +15,7 @@ }, { "cell_type": "markdown", - "id": "594146e3", + "id": "c002c086", "metadata": { "cell_marker": "\"\"\"" }, @@ -28,18 +28,18 @@ }, { "cell_type": "markdown", - "id": "9c0321c2", + "id": "2ca8c339", "metadata": { "cell_marker": "\"\"\"" }, "source": [ - "# Prerequisites\n", + "## Prerequisites\n", "- Python>=3.11" ] }, { "cell_type": "markdown", - "id": "30f4e5b8", + "id": "1818081a", "metadata": { "cell_marker": "\"\"\"" }, @@ -75,32 +75,30 @@ }, { "cell_type": "markdown", - "id": "cb8339ef", + "id": "ad63eb9e", "metadata": { + "cell_marker": "\"\"\"", "lines_to_next_cell": 0 }, "source": [ - "\"\"\"\n", - "Setup\n", + "### Setup\n", "\n", "The commands below will install the required packages and download the example dataset and model checkpoint.\n", "\n", "Setup notes:\n", "\n", - "- **Setting up Google Colab**: To run this quickstart guide using Google Colab, choose the 'T4' GPU runtime from the \"Connect\" dropdown menu in the upper-right corner of this notebook for faster execution.\n", + "- **Setting up Google Colab**: To run this quickstart guide using Google Colab, choose the 'T4' GPU runtime from the 'Connect' dropdown menu in the upper-right corner of this notebook for faster execution.\n", "Using a GPU significantly speeds up running model inference, but CPU compute can also be used.\n", "\n", "- **Setting up local environment**: The commands below assume a Unix-like shell with `wget` installed. On Windows, the files can be downloaded manually from the URLs.\n", - "On Windows, the files can be downloaded manually from the URLs.\n", "\n", - "\"\"\"\n", - "## Install VisCy" + "### Install VisCy" ] }, { "cell_type": "code", "execution_count": null, - "id": "e676e581", + "id": "69b3b31b", "metadata": {}, "outputs": [], "source": [ @@ -112,7 +110,7 @@ { "cell_type": "code", "execution_count": null, - "id": "650827c5", + "id": "d860546d", "metadata": {}, "outputs": [], "source": [ @@ -127,7 +125,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df5a8ee6", + "id": "8ea0587b", "metadata": {}, "outputs": [], "source": [ @@ -138,8 +136,10 @@ { "cell_type": "code", "execution_count": null, - "id": "a6126ba4", - "metadata": {}, + "id": "6dec2a9e", + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [], "source": [ "# Estimated download time: 15-20 minutes wall clock time\n", @@ -149,14 +149,17 @@ "# Download the example registered timelapse data (5-10 minutes)\n", "!wget -m -np -nH --cut-dirs=6 -R \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/registered_test_demo_crop.zarr/\"\n", "# Download the model checkpoint (3 minutes)\n", - "!wget -m -np -nH --cut-dirs=5 \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt\"" + "!wget -m -np -nH --cut-dirs=5 \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_models/DynaCLR-DENV/VS_n_Ph/epoch=94-step=2375.ckpt\"\n", + "# Download the annotations for the infected state\n", + "!wget -m -np -nH --cut-dirs=6 \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/extracted_inf_state.csv\"" ] }, { "cell_type": "markdown", - "id": "dcbb752d", + "id": "dc74d3e7", "metadata": { - "cell_marker": "\"\"\"" + "cell_marker": "\"\"\"", + "lines_to_next_cell": 0 }, "source": [ "## Run Model Inference\n", @@ -168,7 +171,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f18f90ce", + "id": "7c5bbe59", "metadata": {}, "outputs": [], "source": [ @@ -197,7 +200,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9fc31403", + "id": "f2764122", "metadata": {}, "outputs": [], "source": [ @@ -222,7 +225,7 @@ { "cell_type": "code", "execution_count": null, - "id": "269b852e", + "id": "86121d5a", "metadata": {}, "outputs": [], "source": [ @@ -235,7 +238,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d3ac0231", + "id": "afd7a12e", "metadata": {}, "outputs": [], "source": [ @@ -248,7 +251,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0a102372", + "id": "bd1a8063", "metadata": {}, "outputs": [], "source": [ @@ -285,7 +288,7 @@ { "cell_type": "code", "execution_count": null, - "id": "779f2953", + "id": "6d6960dc", "metadata": {}, "outputs": [], "source": [ @@ -299,7 +302,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cb91a331", + "id": "886229f2", "metadata": {}, "outputs": [], "source": [ @@ -322,7 +325,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5e9afbef", + "id": "892b5385", "metadata": {}, "outputs": [], "source": [ @@ -342,7 +345,7 @@ { "cell_type": "code", "execution_count": null, - "id": "211cd3bb", + "id": "c1cc8edb", "metadata": {}, "outputs": [], "source": [ @@ -357,7 +360,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e52a86d8", + "id": "4477cd99", "metadata": {}, "outputs": [], "source": [ @@ -379,7 +382,7 @@ }, { "cell_type": "markdown", - "id": "e6f85ba8", + "id": "0b3f7a24", "metadata": { "cell_marker": "\"\"\"", "lines_to_next_cell": 0 @@ -393,7 +396,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ed4d08b9", + "id": "907fe5df", "metadata": { "lines_to_next_cell": 2 }, @@ -488,7 +491,7 @@ }, { "cell_type": "markdown", - "id": "9b71e916", + "id": "5c107401", "metadata": { "cell_marker": "\"\"\"" }, @@ -500,7 +503,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a49b837e", + "id": "934fcb12", "metadata": {}, "outputs": [], "source": [ @@ -699,7 +702,7 @@ }, { "cell_type": "markdown", - "id": "85c9e727", + "id": "85de10d5", "metadata": { "cell_marker": "\"\"\"" }, @@ -721,9 +724,6 @@ "jupytext": { "cell_metadata_filter": "all", "main_language": "python" - }, - "language_info": { - "name": "python" } }, "nbformat": 4, diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index bd99a2784..ab629233b 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -54,7 +54,7 @@ # %% [markdown] """ -# Setup +### Setup The commands below will install the required packages and download the example dataset and model checkpoint. @@ -65,8 +65,8 @@ - **Setting up local environment**: The commands below assume a Unix-like shell with `wget` installed. On Windows, the files can be downloaded manually from the URLs. -""" ### Install VisCy +""" # %% # Install VisCy with the optional dependencies for this example # See the [repository](https://github.com/mehta-lab/VisCy) for more details @@ -104,8 +104,6 @@ The following code will run inference on a single field of view (FOV) of the example dataset. This can also be achieved by using the VisCy CLI. """ - -# %% # %% from pathlib import Path # noqa: E402 From 3a71560b64171861bc242a67ef418d7b10bc8da1 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Fri, 28 Nov 2025 06:14:14 -0800 Subject: [PATCH 13/14] adding downlaod time header 3 for Karina's coment --- examples/DynaCLR/vcp_tutorials/quickstart.ipynb | 14 ++++++++++++-- examples/DynaCLR/vcp_tutorials/quickstart.py | 7 +++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb index dec660145..2c9c81753 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb +++ b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb @@ -133,6 +133,18 @@ "!viscy --help" ] }, + { + "cell_type": "markdown", + "id": "98cdb574", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 0 + }, + "source": [ + "### Download example data and model checkpoint\n", + "Estimated download time: 15-20 minutes wall clock time" + ] + }, { "cell_type": "code", "execution_count": null, @@ -142,8 +154,6 @@ }, "outputs": [], "source": [ - "# Estimated download time: 15-20 minutes wall clock time\n", - "#\n", "# Download the example tracks data (5-8 minutes)\n", "!wget -m -np -nH --cut-dirs=6 -R \"index.html*\" \"https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/track_test.zarr/\"\n", "# Download the example registered timelapse data (5-10 minutes)\n", diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index ab629233b..c26ff84e5 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -84,9 +84,12 @@ # Validate installation # !viscy --help +# %% [markdown] +""" +### Download example data and model checkpoint +Estimated download time: 15-20 minutes +""" # %% -# Estimated download time: 15-20 minutes wall clock time -# # Download the example tracks data (5-8 minutes) # !wget -m -np -nH --cut-dirs=6 -R "index.html*" "https://public.czbiohub.org/comp.micro/viscy/DynaCLR_data/DENV/test/20240204_A549_DENV_ZIKV_timelapse/track_test.zarr/" # Download the example registered timelapse data (5-10 minutes) From 272826953dcfcebc69a649efb229400d9a516166 Mon Sep 17 00:00:00 2001 From: Eduardo Hirata-Miyasaki Date: Fri, 28 Nov 2025 18:40:42 -0800 Subject: [PATCH 14/14] collab setup note and the viscy phate dependency --- examples/DynaCLR/vcp_tutorials/quickstart.ipynb | 6 ++++-- examples/DynaCLR/vcp_tutorials/quickstart.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb index 2c9c81753..642825e26 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.ipynb +++ b/examples/DynaCLR/vcp_tutorials/quickstart.ipynb @@ -90,6 +90,8 @@ "- **Setting up Google Colab**: To run this quickstart guide using Google Colab, choose the 'T4' GPU runtime from the 'Connect' dropdown menu in the upper-right corner of this notebook for faster execution.\n", "Using a GPU significantly speeds up running model inference, but CPU compute can also be used.\n", "\n", + "- **Google Colab Kaggle prompt**: When running `datamodule.setup(\"predict\")`, Colab may prompt for Kaggle credentials. This is a Colab-specific behavior triggered by certain file I/O patterns and can be safely dismissed by clicking \"Cancel\" - no Kaggle account is required for this tutorial.\n", + "\n", "- **Setting up local environment**: The commands below assume a Unix-like shell with `wget` installed. On Windows, the files can be downloaded manually from the URLs.\n", "\n", "### Install VisCy" @@ -104,7 +106,7 @@ "source": [ "# Install VisCy with the optional dependencies for this example\n", "# See the [repository](https://github.com/mehta-lab/VisCy) for more details\n", - "!pip install \"viscy[metrics,visual]==0.4.0a3\"" + "!pip install \"viscy[metrics,visual,phate]==0.4.0a3\"" ] }, { @@ -142,7 +144,7 @@ }, "source": [ "### Download example data and model checkpoint\n", - "Estimated download time: 15-20 minutes wall clock time" + "Estimated download time: 15-20 minutes" ] }, { diff --git a/examples/DynaCLR/vcp_tutorials/quickstart.py b/examples/DynaCLR/vcp_tutorials/quickstart.py index c26ff84e5..8b8f53ce0 100644 --- a/examples/DynaCLR/vcp_tutorials/quickstart.py +++ b/examples/DynaCLR/vcp_tutorials/quickstart.py @@ -63,6 +63,8 @@ - **Setting up Google Colab**: To run this quickstart guide using Google Colab, choose the 'T4' GPU runtime from the 'Connect' dropdown menu in the upper-right corner of this notebook for faster execution. Using a GPU significantly speeds up running model inference, but CPU compute can also be used. +- **Google Colab Kaggle prompt**: When running `datamodule.setup("predict")`, Colab may prompt for Kaggle credentials. This is a Colab-specific behavior triggered by certain file I/O patterns and can be safely dismissed by clicking "Cancel" - no Kaggle account is required for this tutorial. + - **Setting up local environment**: The commands below assume a Unix-like shell with `wget` installed. On Windows, the files can be downloaded manually from the URLs. ### Install VisCy @@ -70,7 +72,7 @@ # %% # Install VisCy with the optional dependencies for this example # See the [repository](https://github.com/mehta-lab/VisCy) for more details -# !pip install "viscy[metrics,visual]==0.4.0a3" +# !pip install "viscy[metrics,visual,phate]==0.4.0a3" # %% # Restart kernel if running in Google Colab