diff --git a/tedana/workflows/__init__.py b/tedana/workflows/__init__.py index 04b284826..cd2eb480b 100644 --- a/tedana/workflows/__init__.py +++ b/tedana/workflows/__init__.py @@ -5,4 +5,8 @@ from tedana.workflows.t2smap import t2smap_workflow from tedana.workflows.tedana import tedana_workflow -__all__ = ["tedana_workflow", "t2smap_workflow", "ica_reclassify_workflow"] +__all__ = [ + "tedana_workflow", + "t2smap_workflow", + "ica_reclassify_workflow", +] diff --git a/tedana/workflows/ica_reclassify.py b/tedana/workflows/ica_reclassify.py index a7febdb39..b0d39d190 100644 --- a/tedana/workflows/ica_reclassify.py +++ b/tedana/workflows/ica_reclassify.py @@ -1,21 +1,31 @@ """Run the reclassification workflow for a previous tedana run.""" import argparse -import datetime import logging import os import os.path as op import sys -from glob import glob import numpy as np import pandas as pd -import tedana.gscontrol as gsc -from tedana import __version__, io, reporting, selection, utils -from tedana.bibtex import get_description_references +from tedana import io, reporting, selection, utils from tedana.io import ALLOWED_COMPONENT_DELIMITERS from tedana.workflows.parser_utils import parse_manual_list_int, parse_manual_list_str +from tedana.workflows.shared import ( + apply_mir, + apply_tedort, + finalize_report_text, + generate_dynamic_report, + generate_reclassify_figures, + rename_previous_reports, + save_derivative_metadata, + save_workflow_command, + setup_logging, + teardown_workflow, + write_denoised_results, + write_echo_results, +) LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") @@ -312,26 +322,10 @@ def ica_reclassify_workflow( if not op.isdir(out_dir): os.mkdir(out_dir) - # boilerplate + # Setup boilerplate prefix = io._infer_prefix(prefix) - basename = f"{prefix}report" - extension = "txt" - repname = op.join(out_dir, (basename + "." + extension)) - bibtex_file = op.join(out_dir, f"{prefix}references.bib") - repex = op.join(out_dir, (basename + "*")) - previousreps = glob(repex) - previousreps.sort(reverse=True) - for f in previousreps: - previousparts = op.splitext(f) - newname = previousparts[0] + "_old" + previousparts[1] - os.rename(f, newname) - - # create logfile name - basename = "tedana_" - extension = "tsv" - start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H%M%S") - logname = op.join(out_dir, (basename + start_time + "." + extension)) - utils.setup_loggers(logname=logname, repname=repname, quiet=quiet, debug=debug) + repname, bibtex_file = rename_previous_reports(out_dir, prefix) + setup_logging(out_dir, repname, quiet, debug) # Coerce gscontrol to list if not isinstance(gscontrol, list): @@ -371,15 +365,12 @@ def ica_reclassify_workflow( raise ValueError(f"The following components were both accepted and rejected: {in_both}") # Save command into sh file, if the command-line interface was used - # TODO: use io_generator to save command if reclassify_command is not None: - command_file = open(os.path.join(out_dir, "ica_reclassify_call.sh"), "w") - command_file.write(reclassify_command) - command_file.close() + save_workflow_command(out_dir, reclassify_command, "ica_reclassify_call.sh") else: - # Get variables passed to function if the tedana command is None + # Get variables passed to function if the reclassify command is None variables = ", ".join(f"{name}={value}" for name, value in locals().items()) - # From variables, remove everything after ", tedana_command" + # From variables, remove everything after ", reclassify_command" variables = variables.split(", reclassify_command")[0] reclassify_command = f"ica_reclassify_workflow({variables})" @@ -476,40 +467,21 @@ def ica_reclassify_workflow( ) mixing_orig = mixing.copy() - # TODO: make this a function if tedort: - comps_accepted = selector.accepted_comps_ - comps_rejected = selector.rejected_comps_ - acc_ts = mixing[:, comps_accepted] - rej_ts = mixing[:, comps_rejected] - betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] - pred_rej_ts = np.dot(acc_ts, betas) - resid = rej_ts - pred_rej_ts - mixing[:, comps_rejected] = resid + mixing = apply_tedort(mixing, selector.accepted_comps_, selector.rejected_comps_) comp_names = [ io.add_decomp_prefix(comp, prefix="ica", max_value=component_table.index.max()) for comp in range(selector.n_comps_) ] mixing_df = pd.DataFrame(data=mixing, columns=comp_names) io_generator.save_file(mixing_df, "ICA orthogonalized mixing tsv") - RepLGR.info( - "Rejected components' time series were then " - "orthogonalized with respect to accepted components' time " - "series." - ) # img_t_r = io_generator.reference_img.header.get_zooms()[-1] adaptive_mask = utils.reshape_niimg(adaptive_mask) mask_denoise = adaptive_mask >= 1 data_optcom = utils.reshape_niimg(data_optcom) - # TODO: make a better result-writing function - # #############################################!!!! - # TODO: make a better time series creation function - # - get_ts_fit_tag(include=[], exclude=[]) - # - get_ts_regress/residual_tag(include=[], exclude=[]) - # How to handle [acc/rej] + tag ? - io.writeresults( + write_denoised_results( data_optcom, mask=mask_denoise, component_table=component_table, @@ -519,7 +491,7 @@ def ica_reclassify_workflow( if "mir" in gscontrol: io_generator.overwrite = True - gsc.minimum_image_regression( + apply_mir( data_optcom=data_optcom, mixing=mixing, mask=mask_denoise, @@ -531,84 +503,38 @@ def ica_reclassify_workflow( if verbose: LGR.debug("Writing out verbose data") - io.writeresults_echoes(data_cat, mixing, mask_denoise, component_table, io_generator) + write_echo_results(data_cat, mixing, mask_denoise, component_table, io_generator) # Write out BIDS-compatible description file - derivative_metadata = { - "Name": "tedana Outputs", - "BIDSVersion": "1.5.0", - "DatasetType": "derivative", - "GeneratedBy": [ - { - "Name": "ica_reclassify", - "Version": __version__, - "Description": ( - "A denoising pipeline for the identification and removal " - "of non-BOLD noise from multi-echo fMRI data." - ), - "CodeURL": "https://github.com/ME-ICA/tedana", - "Node": { - "Name": info_dict["Node"], - "System": info_dict["System"], - "Machine": info_dict["Machine"], - "Processor": info_dict["Processor"], - "Release": info_dict["Release"], - "Version": info_dict["Version"], - }, - "Python": info_dict["Python"], - "Python_Libraries": info_dict["Python_Libraries"], - "Command": info_dict["Command"], - } - ], - } - io_generator.save_file(derivative_metadata, "data description json") - - with open(repname) as fo: - report = [line.rstrip() for line in fo.readlines()] - report = " ".join(report) - - with open(repname, "w") as fo: - fo.write(report) - - # Collect BibTeX entries for cited papers - references = get_description_references(report) - - with open(bibtex_file, "w") as fo: - fo.write(references) - - if not no_reports: - LGR.info("Making figures folder with static component maps and timecourse plots.") + save_derivative_metadata( + io_generator, + info_dict, + workflow_name="ica_reclassify", + workflow_description=( + "A denoising pipeline for the identification and removal " + "of non-BOLD noise from multi-echo fMRI data." + ), + ) - dn_ts, hikts, lowkts = io.denoise_ts(data_optcom, mixing, mask_denoise, component_table) + # Finalize report text and write BibTeX references + finalize_report_text(repname, bibtex_file) - reporting.static_figures.carpet_plot( - optcom_ts=data_optcom, - denoised_ts=dn_ts, - hikts=hikts, - lowkts=lowkts, - mask=mask_denoise, - io_generator=io_generator, - gscontrol=gscontrol, - ) - reporting.static_figures.comp_figures( - data_optcom, - mask=mask_denoise, + if not no_reports: + generate_reclassify_figures( + data_optcom=data_optcom, + mask_denoise=mask_denoise, component_table=component_table, mixing=mixing_orig, io_generator=io_generator, png_cmap=png_cmap, + gscontrol=gscontrol, ) - - LGR.info("Generating dynamic report") - reporting.generate_report(io_generator, cluster_labels=None, similarity_t_sne=None) + generate_dynamic_report(io_generator) io_generator.save_self() LGR.info("Workflow completed") - # Add newsletter info to the log - utils.log_newsletter_info() - - utils.teardown_loggers() + teardown_workflow() if __name__ == "__main__": diff --git a/tedana/workflows/shared/__init__.py b/tedana/workflows/shared/__init__.py new file mode 100644 index 000000000..e5cb9b4ea --- /dev/null +++ b/tedana/workflows/shared/__init__.py @@ -0,0 +1,83 @@ +"""Shared modules for tedana workflows. + +This package provides reusable components that are shared across +the tedana, t2smap, and ica_reclassify workflows. +""" + +from tedana.workflows.shared.combination import ( + compute_optimal_combination, + compute_optimal_combination_simple, +) +from tedana.workflows.shared.containers import ( + DecayMaps, + DecompositionResult, + MaskData, + MultiEchoData, + OptcomData, + WorkflowConfig, +) +from tedana.workflows.shared.data_loading import load_multiecho_data, validate_tr +from tedana.workflows.shared.fitting import fit_decay_model, fit_decay_model_simple +from tedana.workflows.shared.masking import ( + create_adaptive_masks, + create_simple_adaptive_mask, +) +from tedana.workflows.shared.output import ( + apply_mir, + apply_tedort, + finalize_report_text, + save_derivative_metadata, + write_denoised_results, + write_echo_results, +) +from tedana.workflows.shared.reporting import ( + generate_dynamic_report, + generate_reclassify_figures, + generate_static_figures, +) +from tedana.workflows.shared.setup import ( + rename_previous_reports, + save_workflow_command, + setup_logging, + setup_output_directory, + teardown_workflow, +) + +__all__ = [ + # Containers + "WorkflowConfig", + "MultiEchoData", + "MaskData", + "DecayMaps", + "OptcomData", + "DecompositionResult", + # Setup + "setup_output_directory", + "setup_logging", + "save_workflow_command", + "rename_previous_reports", + "teardown_workflow", + # Data loading + "load_multiecho_data", + "validate_tr", + # Masking + "create_adaptive_masks", + "create_simple_adaptive_mask", + # Fitting + "fit_decay_model", + "fit_decay_model_simple", + # Combination + "compute_optimal_combination", + "compute_optimal_combination_simple", + # Output + "apply_tedort", + "write_denoised_results", + "apply_mir", + "write_echo_results", + "save_derivative_metadata", + "finalize_report_text", + # Reporting + "generate_static_figures", + "generate_dynamic_report", + "generate_reclassify_figures", +] diff --git a/tedana/workflows/shared/combination.py b/tedana/workflows/shared/combination.py new file mode 100644 index 000000000..7a8ae0781 --- /dev/null +++ b/tedana/workflows/shared/combination.py @@ -0,0 +1,113 @@ +"""Optimal combination utilities for tedana workflows. + +This module provides functions for optimally combining multi-echo data. +""" + +import logging +from typing import Any, List, Optional + +import numpy as np + +import tedana.gscontrol as gsc +from tedana import combine +from tedana.workflows.shared.containers import ( + DecayMaps, + MaskData, + MultiEchoData, + OptcomData, +) + +LGR = logging.getLogger("GENERAL") + + +def compute_optimal_combination( + data: MultiEchoData, + masks: MaskData, + decay_maps: DecayMaps, + combmode: str, + io_generator: Any, + gscontrol: Optional[List[str]] = None, +) -> OptcomData: + """Compute optimally combined data. + + Parameters + ---------- + data : MultiEchoData + Loaded multi-echo data. + masks : MaskData + Mask data. + decay_maps : DecayMaps + T2*/S0 maps from decay fitting. + combmode : str + Combination method (currently only 't2s'). + io_generator : OutputGenerator + Output generator for saving files. + gscontrol : list of str, optional + Global signal control methods. Default is None. + + Returns + ------- + OptcomData + Container with optimally combined data. + + Notes + ----- + If 'gsr' is in gscontrol, global signal regression is applied + to both the multi-echo data and the optimally combined data. + """ + if gscontrol is None: + gscontrol = [] + + data_optcom = combine.make_optcom( + data.data_cat, + data.tes, + masks.masksum_denoise, + t2s=decay_maps.t2s_full, + combmode=combmode, + ) + + # Apply global signal regression if requested + if "gsr" in gscontrol: + # Note: This modifies data.data_cat in place + data.data_cat, data_optcom = gsc.gscontrol_raw( + data_cat=data.data_cat, + data_optcom=data_optcom, + n_echos=data.n_echos, + io_generator=io_generator, + ) + + fout = io_generator.save_file(data_optcom, "combined img") + LGR.info(f"Writing optimally combined data set: {fout}") + + return OptcomData(data_optcom=data_optcom) + + +def compute_optimal_combination_simple( + data_cat: np.ndarray, + tes: List[float], + masksum: np.ndarray, + t2s: np.ndarray, + combmode: str, +) -> np.ndarray: + """Compute optimally combined data without saving (for t2smap workflow). + + Parameters + ---------- + data_cat : np.ndarray + Multi-echo data array (S x E x T). + tes : list of float + Echo times in milliseconds. + masksum : np.ndarray + Masksum array. + t2s : np.ndarray + T2* map. + combmode : str + Combination method. + + Returns + ------- + np.ndarray + Optimally combined data. + """ + LGR.info("Computing optimal combination") + return combine.make_optcom(data_cat, tes, masksum, t2s=t2s, combmode=combmode) diff --git a/tedana/workflows/shared/containers.py b/tedana/workflows/shared/containers.py new file mode 100644 index 000000000..4d2ead82f --- /dev/null +++ b/tedana/workflows/shared/containers.py @@ -0,0 +1,262 @@ +"""Data containers for tedana workflows. + +This module provides focused dataclasses for passing data between +workflow stages. Each container holds related data and provides +a clear interface for what each stage needs and produces. +""" + +from dataclasses import dataclass, field +from typing import Any, List, Optional + +import numpy as np +import pandas as pd + + +@dataclass +class WorkflowConfig: + """Common workflow configuration parameters. + + Parameters + ---------- + out_dir : str + Absolute path to output directory. + prefix : str + Prefix for output filenames. + convention : str + Filenaming convention ('bids' or 'orig'). + verbose : bool + Whether to generate verbose output. + debug : bool + Whether to run in debug mode. + quiet : bool + Whether to suppress logging. + overwrite : bool + Whether to overwrite existing files. + """ + + out_dir: str + prefix: str + convention: str = "bids" + verbose: bool = False + debug: bool = False + quiet: bool = False + overwrite: bool = False + + +@dataclass +class MultiEchoData: + """Container for loaded multi-echo fMRI data. + + Parameters + ---------- + data_cat : np.ndarray + Concatenated multi-echo data array with shape (S x E x T), + where S is samples (voxels), E is echoes, T is timepoints. + ref_img : nibabel image + Reference image for output generation. + tes : list of float + Echo times in milliseconds. + n_samp : int + Number of samples (voxels). + n_echos : int + Number of echoes. + n_vols : int + Number of volumes (timepoints). + """ + + data_cat: np.ndarray = field(repr=False) + ref_img: Any = field(repr=False) + tes: List[float] + n_samp: int + n_echos: int + n_vols: int + + @classmethod + def from_data_array( + cls, data_cat: np.ndarray, ref_img: Any, tes: List[float] + ) -> "MultiEchoData": + """Create MultiEchoData from a data array. + + Parameters + ---------- + data_cat : np.ndarray + Concatenated multi-echo data (S x E x T). + ref_img : nibabel image + Reference image. + tes : list of float + Echo times in milliseconds. + + Returns + ------- + MultiEchoData + Initialized container. + """ + n_samp, n_echos, n_vols = data_cat.shape + return cls( + data_cat=data_cat, + ref_img=ref_img, + tes=tes, + n_samp=n_samp, + n_echos=n_echos, + n_vols=n_vols, + ) + + +@dataclass +class MaskData: + """Container for mask arrays. + + Parameters + ---------- + base_mask : np.ndarray + Initial binary mask (from user or computed). + mask_denoise : np.ndarray + Liberal mask for denoising (voxels with >= 1 good echo). + mask_clf : np.ndarray + Conservative mask for classification (voxels with >= 3 good echoes). + masksum_denoise : np.ndarray + Per-voxel count of good echoes for denoising mask. + masksum_clf : np.ndarray + Per-voxel count of good echoes for classification mask. + """ + + base_mask: np.ndarray = field(repr=False) + mask_denoise: np.ndarray = field(repr=False) + mask_clf: np.ndarray = field(repr=False) + masksum_denoise: np.ndarray = field(repr=False) + masksum_clf: np.ndarray = field(repr=False) + + @property + def n_voxels_denoise(self) -> int: + """Number of voxels in denoising mask.""" + return int(self.mask_denoise.sum()) + + @property + def n_voxels_clf(self) -> int: + """Number of voxels in classification mask.""" + return int(self.mask_clf.sum()) + + +@dataclass +class DecayMaps: + """Container for T2* and S0 maps. + + Parameters + ---------- + t2s_limited : np.ndarray + T2* map limited to mask_denoise voxels (in milliseconds). + t2s_full : np.ndarray + Full T2* map with extrapolation for dropout voxels (in milliseconds). + s0_limited : np.ndarray + S0 map limited to mask_denoise voxels. + s0_full : np.ndarray + Full S0 map with extrapolation for dropout voxels. + """ + + t2s_limited: np.ndarray = field(repr=False) + t2s_full: np.ndarray = field(repr=False) + s0_limited: np.ndarray = field(repr=False) + s0_full: np.ndarray = field(repr=False) + + +@dataclass +class OptcomData: + """Container for optimally combined data. + + Parameters + ---------- + data_optcom : np.ndarray + Optimally combined timeseries with shape (S x T). + """ + + data_optcom: np.ndarray = field(repr=False) + + @property + def n_samp(self) -> int: + """Number of samples (voxels).""" + return self.data_optcom.shape[0] + + @property + def n_vols(self) -> int: + """Number of volumes (timepoints).""" + return self.data_optcom.shape[1] + + +@dataclass +class DecompositionResult: + """Container for PCA/ICA decomposition results. + + Parameters + ---------- + mixing : np.ndarray + ICA mixing matrix with shape (T x C), where T is timepoints + and C is components. + n_components : int + Number of components. + data_reduced : np.ndarray, optional + PCA-reduced data for ICA input. + cluster_labels : np.ndarray, optional + Cluster labels from robustica (if used). + similarity_t_sne : np.ndarray, optional + t-SNE similarity from robustica (if used). + convergence_warning_count : int, optional + Number of convergence warnings from FastICA. + index_quality : float, optional + Mean index quality from robustica. + """ + + mixing: np.ndarray = field(repr=False) + n_components: int + data_reduced: Optional[np.ndarray] = field(default=None, repr=False) + cluster_labels: Optional[np.ndarray] = field(default=None, repr=False) + similarity_t_sne: Optional[np.ndarray] = field(default=None, repr=False) + convergence_warning_count: Optional[int] = None + index_quality: Optional[float] = None + + @property + def n_vols(self) -> int: + """Number of volumes (timepoints).""" + return self.mixing.shape[0] + + +@dataclass +class SelectionResult: + """Container for component selection results. + + Parameters + ---------- + selector : ComponentSelector + Component selector object with selection results. + component_table : pd.DataFrame + Component metrics table with classifications. + n_accepted : int + Number of accepted components. + n_rejected : int + Number of rejected components. + """ + + selector: Any + component_table: pd.DataFrame = field(repr=False) + n_accepted: int + n_rejected: int + + @classmethod + def from_selector(cls, selector: Any) -> "SelectionResult": + """Create SelectionResult from a ComponentSelector. + + Parameters + ---------- + selector : ComponentSelector + Selector after running selection. + + Returns + ------- + SelectionResult + Initialized container. + """ + return cls( + selector=selector, + component_table=selector.component_table_, + n_accepted=selector.n_accepted_comps_, + n_rejected=selector.n_rejected_comps_, + ) diff --git a/tedana/workflows/shared/data_loading.py b/tedana/workflows/shared/data_loading.py new file mode 100644 index 000000000..ffb6d414d --- /dev/null +++ b/tedana/workflows/shared/data_loading.py @@ -0,0 +1,90 @@ +"""Data loading utilities for tedana workflows. + +This module provides functions for loading multi-echo fMRI data. +""" + +import logging +from typing import List, Union + +from tedana import io, utils +from tedana.workflows.shared.containers import MultiEchoData + +LGR = logging.getLogger("GENERAL") + + +def load_multiecho_data( + data: Union[str, List[str]], + tes: List[float], + dummy_scans: int = 0, +) -> MultiEchoData: + """Load and validate multi-echo fMRI data. + + Parameters + ---------- + data : str or list of str + Either a single z-concatenated file or a list of echo-specific files. + tes : list of float + Echo times in milliseconds. + dummy_scans : int, optional + Number of dummy scans to remove. Default is 0. + + Returns + ------- + MultiEchoData + Container with loaded data. + """ + # Ensure tes are floats and validate + tes = [float(te) for te in tes] + tes = utils.check_te_values(tes) + n_echos = len(tes) + + # Ensure data is a list + if isinstance(data, str): + data = [data] + + LGR.info(f"Loading input data: {[f for f in data]}") + data_cat, ref_img = io.load_data(data, n_echos=n_echos, dummy_scans=dummy_scans) + + n_samp, n_echos_loaded, n_vols = data_cat.shape + LGR.debug(f"Resulting data shape: {data_cat.shape}") + + return MultiEchoData( + data_cat=data_cat, + ref_img=ref_img, + tes=tes, + n_samp=n_samp, + n_echos=n_echos_loaded, + n_vols=n_vols, + ) + + +def validate_tr(ref_img) -> float: + """Validate that TR is non-zero and return it. + + Parameters + ---------- + ref_img : nibabel image + Reference image to get TR from. + + Returns + ------- + float + TR value from image header. + + Raises + ------ + OSError + If TR is 0. + """ + img_t_r = ref_img.header.get_zooms()[-1] + if img_t_r == 0: + raise OSError( + "Dataset has a TR of 0. This indicates incorrect" + " header information. To correct this, we recommend" + " using this snippet:" + "\n" + "https://gist.github.com/jbteves/032c87aeb080dd8de8861cb151bff5d6" + "\n" + "to correct your TR to the value it should be." + ) + return img_t_r diff --git a/tedana/workflows/shared/fitting.py b/tedana/workflows/shared/fitting.py new file mode 100644 index 000000000..3933e7e44 --- /dev/null +++ b/tedana/workflows/shared/fitting.py @@ -0,0 +1,133 @@ +"""T2*/S0 decay fitting utilities for tedana workflows. + +This module provides functions for fitting the T2* decay model. +""" + +import logging +from typing import Any + +from scipy import stats + +from tedana import decay, utils +from tedana.workflows.shared.containers import DecayMaps, MaskData, MultiEchoData + +LGR = logging.getLogger("GENERAL") + + +def fit_decay_model( + data: MultiEchoData, + masks: MaskData, + fittype: str, + io_generator: Any, + verbose: bool = False, + n_threads: int = 1, +) -> DecayMaps: + """Fit T2*/S0 decay model and save outputs. + + Parameters + ---------- + data : MultiEchoData + Loaded multi-echo data. + masks : MaskData + Mask data for fitting. + fittype : str + Fitting method ('loglin' or 'curvefit'). + io_generator : OutputGenerator + Output generator for saving files. + verbose : bool, optional + Whether to save verbose outputs. Default is False. + n_threads : int, optional + Number of threads to use for parallel processing. Default is 1. + + Returns + ------- + DecayMaps + Container with T2* and S0 maps. + """ + LGR.info("Computing T2* map") + t2s_limited, s0_limited, t2s_full, s0_full = decay.fit_decay( + data=data.data_cat, + tes=data.tes, + mask=masks.mask_denoise, + adaptive_mask=masks.masksum_denoise, + fittype=fittype, + n_threads=n_threads, + ) + + # Set a hard cap for the T2* map + # Anything 10x higher than the 99.5 percentile is reset + cap_t2s = stats.scoreatpercentile(t2s_full.flatten(), 99.5, interpolation_method="lower") + LGR.debug(f"Setting cap on T2* map at {utils.millisec2sec(cap_t2s):.5f}s") + t2s_full[t2s_full > cap_t2s * 10] = cap_t2s + + # Save outputs + io_generator.save_file(utils.millisec2sec(t2s_full), "t2star img") + io_generator.save_file(s0_full, "s0 img") + + if verbose: + io_generator.save_file(utils.millisec2sec(t2s_limited), "limited t2star img") + io_generator.save_file(s0_limited, "limited s0 img") + + # Calculate RMSE + rmse_map, rmse_df = decay.rmse_of_fit_decay_ts( + data=data.data_cat, + tes=data.tes, + adaptive_mask=masks.masksum_denoise, + t2s=t2s_limited, + s0=s0_limited, + fitmode="all", + ) + io_generator.save_file(rmse_map, "rmse img") + io_generator.add_df_to_file(rmse_df, "confounds tsv") + + return DecayMaps( + t2s_limited=t2s_limited, + t2s_full=t2s_full, + s0_limited=s0_limited, + s0_full=s0_full, + ) + + +def fit_decay_model_simple( + data_cat, + tes, + mask, + masksum, + fittype: str, + fitmode: str = "all", + n_threads: int = 1, +): + """Fit T2*/S0 decay model without saving (for t2smap workflow). + + Parameters + ---------- + data_cat : np.ndarray + Multi-echo data array (S x E x T). + tes : list of float + Echo times in milliseconds. + mask : np.ndarray + Mask array. + masksum : np.ndarray + Masksum array. + fittype : str + Fitting method ('loglin' or 'curvefit'). + fitmode : str, optional + Fitting mode ('all' or 'ts'). Default is 'all'. + n_threads : int, optional + Number of threads to use for parallel processing. Default is 1. + + Returns + ------- + tuple + Tuple of (t2s_limited, s0_limited, t2s_full, s0_full). + """ + LGR.info("Computing adaptive T2* map") + decay_function = decay.fit_decay if fitmode == "all" else decay.fit_decay_ts + return decay_function( + data=data_cat, + tes=tes, + mask=mask, + adaptive_mask=masksum, + fittype=fittype, + n_threads=n_threads, + ) diff --git a/tedana/workflows/shared/masking.py b/tedana/workflows/shared/masking.py new file mode 100644 index 000000000..73979f638 --- /dev/null +++ b/tedana/workflows/shared/masking.py @@ -0,0 +1,147 @@ +"""Masking utilities for tedana workflows. + +This module provides functions for creating adaptive masks. +""" + +import logging +from typing import Any, List, Optional + +import numpy as np +from nilearn.masking import compute_epi_mask + +from tedana import io, utils +from tedana.workflows.shared.containers import MaskData, MultiEchoData + +LGR = logging.getLogger("GENERAL") +RepLGR = logging.getLogger("REPORT") + + +def create_adaptive_masks( + data: MultiEchoData, + mask_file: Optional[str], + masktype: List[str], + io_generator: Any, + t2smap_file: Optional[str] = None, + n_independent_echos: Optional[int] = None, +) -> MaskData: + """Create adaptive masks for denoising and classification. + + Creates both a liberal mask (for denoising, requiring at least 1 good echo) + and a conservative mask (for classification, requiring at least 3 good echoes). + + Parameters + ---------- + data : MultiEchoData + Loaded multi-echo data. + mask_file : str or None + Path to user-provided mask file, or None to compute from data. + masktype : list of str + Methods for adaptive mask generation ('dropout', 'decay', 'none'). + io_generator : OutputGenerator + Output generator for saving mask files. + t2smap_file : str, optional + Path to pre-computed T2* map for mask generation. + n_independent_echos : int, optional + Number of independent echoes for F-stat calculation. + + Returns + ------- + MaskData + Container with all mask arrays. + """ + t2s_limited = None + + if mask_file and not t2smap_file: + LGR.info("Using user-defined mask") + RepLGR.info("A user-defined mask was applied to the data.") + base_mask = utils.reshape_niimg(mask_file).astype(int) + elif t2smap_file and not mask_file: + LGR.info("Assuming user-defined T2* map is masked and using it to generate mask") + t2s_limited_sec = utils.reshape_niimg(t2smap_file) + t2s_limited = utils.sec2millisec(t2s_limited_sec) + base_mask = (t2s_limited != 0).astype(int) + elif t2smap_file and mask_file: + LGR.info("Combining user-defined mask and T2* map to generate mask") + t2s_limited_sec = utils.reshape_niimg(t2smap_file) + t2s_limited = utils.sec2millisec(t2s_limited_sec) + base_mask = utils.reshape_niimg(mask_file).astype(int) + base_mask[t2s_limited == 0] = 0 + else: + LGR.warning( + "Computing EPI mask from first echo using nilearn's compute_epi_mask function. " + "Most external pipelines include more reliable masking functions. " + "It is strongly recommended to provide an external mask, " + "and to visually confirm that mask accurately conforms to data boundaries." + ) + first_echo_img = io.new_nii_like(io_generator.reference_img, data.data_cat[:, 0, :]) + base_mask = compute_epi_mask(first_echo_img).get_fdata() + base_mask = utils.reshape_niimg(base_mask).astype(int) + RepLGR.info( + "An initial mask was generated from the first echo using " + "nilearn's compute_epi_mask function." + ) + + # Create adaptive mask with at least 1 good echo, for denoising + mask_denoise, masksum_denoise = utils.make_adaptive_mask( + data.data_cat, + mask=base_mask, + n_independent_echos=n_independent_echos, + threshold=1, + methods=masktype, + ) + LGR.debug(f"Retaining {mask_denoise.sum()}/{data.n_samp} samples for denoising") + io_generator.save_file(masksum_denoise, "adaptive mask img") + + # Create adaptive mask with at least 3 good echoes, for classification + masksum_clf = masksum_denoise.copy() + masksum_clf[masksum_clf < 3] = 0 + mask_clf = masksum_clf.astype(bool) + RepLGR.info( + "A two-stage masking procedure was applied, in which a liberal mask " + "(including voxels with good data in at least the first echo) was used for " + "optimal combination, T2*/S0 estimation, and denoising, while a more conservative mask " + "(restricted to voxels with good data in at least the first three echoes) was used for " + "the component classification procedure." + ) + LGR.debug(f"Retaining {mask_clf.sum()}/{data.n_samp} samples for classification") + + return MaskData( + base_mask=base_mask, + mask_denoise=mask_denoise, + mask_clf=mask_clf, + masksum_denoise=masksum_denoise, + masksum_clf=masksum_clf, + ) + + +def create_simple_adaptive_mask( + data_cat: np.ndarray, + mask: Optional[np.ndarray], + masktype: List[str], + n_independent_echos: Optional[int] = None, +) -> tuple: + """Create a simple adaptive mask (for t2smap workflow). + + Parameters + ---------- + data_cat : np.ndarray + Multi-echo data array (S x E x T). + mask : np.ndarray or None + Initial mask or None to compute from data. + masktype : list of str + Methods for adaptive mask generation. + n_independent_echos : int, optional + Number of independent echoes. + + Returns + ------- + tuple of (np.ndarray, np.ndarray) + Tuple of (mask, masksum). + """ + return utils.make_adaptive_mask( + data_cat, + mask=mask, + n_independent_echos=n_independent_echos, + threshold=1, + methods=masktype, + ) diff --git a/tedana/workflows/shared/output.py b/tedana/workflows/shared/output.py new file mode 100644 index 000000000..fb74e47c4 --- /dev/null +++ b/tedana/workflows/shared/output.py @@ -0,0 +1,234 @@ +"""Output and result writing utilities for tedana workflows. + +This module provides functions for writing workflow outputs. +""" + +import json +import logging +from typing import Any, Dict, List + +import numpy as np +import pandas as pd + +import tedana.gscontrol as gsc +from tedana import __version__, io +from tedana.bibtex import get_description_references + +LGR = logging.getLogger("GENERAL") +RepLGR = logging.getLogger("REPORT") + + +def apply_tedort( + mixing: np.ndarray, + accepted_comps: List[int], + rejected_comps: List[int], +) -> np.ndarray: + """Orthogonalize rejected components with respect to accepted components. + + Parameters + ---------- + mixing : np.ndarray + Mixing matrix (T x C). + accepted_comps : list of int + Indices of accepted components. + rejected_comps : list of int + Indices of rejected components. + + Returns + ------- + np.ndarray + Modified mixing matrix with orthogonalized rejected components. + """ + acc_ts = mixing[:, accepted_comps] + rej_ts = mixing[:, rejected_comps] + betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] + pred_rej_ts = np.dot(acc_ts, betas) + resid = rej_ts - pred_rej_ts + + mixing_orth = mixing.copy() + mixing_orth[:, rejected_comps] = resid + + RepLGR.info( + "Rejected components' time series were then " + "orthogonalized with respect to accepted components' time " + "series." + ) + + return mixing_orth + + +def write_denoised_results( + data_optcom: np.ndarray, + mask: np.ndarray, + component_table: pd.DataFrame, + mixing: np.ndarray, + io_generator: Any, +) -> None: + """Write denoised timeseries and component results. + + Parameters + ---------- + data_optcom : np.ndarray + Optimally combined data (S x T). + mask : np.ndarray + Mask for denoising. + component_table : pd.DataFrame + Component metrics table. + mixing : np.ndarray + Mixing matrix (T x C). + io_generator : OutputGenerator + Output generator for saving files. + """ + io.writeresults( + data_optcom, + mask=mask, + component_table=component_table, + mixing=mixing, + io_generator=io_generator, + ) + + +def apply_mir( + data_optcom: np.ndarray, + mixing: np.ndarray, + mask: np.ndarray, + component_table: pd.DataFrame, + classification_tags: List[str], + io_generator: Any, +) -> None: + """Apply minimum image regression. + + Parameters + ---------- + data_optcom : np.ndarray + Optimally combined data (S x T). + mixing : np.ndarray + Mixing matrix (T x C). + mask : np.ndarray + Mask array. + component_table : pd.DataFrame + Component metrics table. + classification_tags : list of str + Classification tags from selector. + io_generator : OutputGenerator + Output generator for saving files. + """ + gsc.minimum_image_regression( + data_optcom=data_optcom, + mixing=mixing, + mask=mask, + component_table=component_table, + classification_tags=classification_tags, + io_generator=io_generator, + ) + + +def write_echo_results( + data_cat: np.ndarray, + mixing: np.ndarray, + mask: np.ndarray, + component_table: pd.DataFrame, + io_generator: Any, +) -> None: + """Write per-echo results (verbose mode). + + Parameters + ---------- + data_cat : np.ndarray + Multi-echo data (S x E x T). + mixing : np.ndarray + Mixing matrix (T x C). + mask : np.ndarray + Mask array. + component_table : pd.DataFrame + Component metrics table. + io_generator : OutputGenerator + Output generator for saving files. + """ + io.writeresults_echoes(data_cat, mixing, mask, component_table, io_generator) + + +def save_derivative_metadata( + io_generator: Any, + info_dict: Dict[str, Any], + workflow_name: str, + workflow_description: str, +) -> None: + """Save BIDS-compatible derivative metadata. + + Parameters + ---------- + io_generator : OutputGenerator + Output generator for saving files. + info_dict : dict + System and command info dictionary. + workflow_name : str + Name of the workflow (e.g., 'tedana', 't2smap', 'ica_reclassify'). + workflow_description : str + Description of the workflow. + """ + derivative_metadata = { + "Name": f"{workflow_name} Outputs", + "BIDSVersion": "1.5.0", + "DatasetType": "derivative", + "GeneratedBy": [ + { + "Name": workflow_name, + "Version": __version__, + "Description": workflow_description, + "CodeURL": "https://github.com/ME-ICA/tedana", + "Node": { + "Name": info_dict["Node"], + "System": info_dict["System"], + "Machine": info_dict["Machine"], + "Processor": info_dict["Processor"], + "Release": info_dict["Release"], + "Version": info_dict["Version"], + }, + "Python": info_dict["Python"], + "Python_Libraries": info_dict["Python_Libraries"], + "Command": info_dict["Command"], + } + ], + } + with open(io_generator.get_name("data description json"), "w") as fo: + json.dump(derivative_metadata, fo, sort_keys=True, indent=4) + + +def finalize_report_text(repname: str, bibtex_file: str) -> None: + """Finalize report text and write BibTeX references. + + Parameters + ---------- + repname : str + Path to report text file. + bibtex_file : str + Path to BibTeX output file. + """ + RepLGR.info( + "\n\nThis workflow used numpy \\citep{van2011numpy}, scipy \\citep{virtanen2020scipy}, " + "pandas \\citep{mckinney2010data,reback2020pandas}, " + "scikit-learn \\citep{pedregosa2011scikit}, " + "nilearn, bokeh \\citep{bokehmanual}, matplotlib \\citep{Hunter2007}, " + "and nibabel \\citep{brett_matthew_2019_3233118}." + ) + + RepLGR.info( + "This workflow also used the Dice similarity index " + "\\citep{dice1945measures,sorensen1948method}." + ) + + with open(repname) as fo: + report = [line.rstrip() for line in fo.readlines()] + report = " ".join(report) + # Double-spaces reflect new paragraphs + report = report.replace(" ", "\n\n") + + with open(repname, "w") as fo: + fo.write(report) + + # Collect BibTeX entries + references = get_description_references(report) + + with open(bibtex_file, "w") as fo: + fo.write(references) diff --git a/tedana/workflows/shared/reporting.py b/tedana/workflows/shared/reporting.py new file mode 100644 index 000000000..b8ae90cae --- /dev/null +++ b/tedana/workflows/shared/reporting.py @@ -0,0 +1,204 @@ +"""Report generation utilities for tedana workflows. + +This module provides functions for generating HTML reports and static figures. +""" + +import logging +import os +from typing import Any, List, Optional + +import numpy as np +import pandas as pd + +from tedana import io, reporting + +LGR = logging.getLogger("GENERAL") + + +def generate_static_figures( + data_optcom: np.ndarray, + mask_denoise: np.ndarray, + base_mask: np.ndarray, + component_table: pd.DataFrame, + mixing: np.ndarray, + io_generator: Any, + png_cmap: str, + gscontrol: List[str], + masksum_denoise: Optional[np.ndarray] = None, + external_regressors: Optional[pd.DataFrame] = None, + t2smap_provided: bool = False, +) -> None: + """Generate static figures for the HTML report. + + Parameters + ---------- + data_optcom : np.ndarray + Optimally combined data (S x T). + mask_denoise : np.ndarray + Denoising mask. + base_mask : np.ndarray + Base mask for adaptive mask plot. + component_table : pd.DataFrame + Component metrics table. + mixing : np.ndarray + Mixing matrix (T x C) - original, not tedort-modified. + io_generator : OutputGenerator + Output generator for saving files. + png_cmap : str + Colormap for figures. + gscontrol : list of str + Global signal control methods used. + masksum_denoise : np.ndarray, optional + Masksum for RMSE plot. + external_regressors : pd.DataFrame, optional + External regressors for correlation heatmap. + t2smap_provided : bool, optional + Whether T2* map was provided (skip RMSE plot if True). + """ + LGR.info("Making figures folder with static component maps and timecourse plots.") + + # Generate denoised timeseries for carpet plot + data_denoised, data_accepted, data_rejected = io.denoise_ts( + data_optcom, + mixing, + mask_denoise, + component_table, + ) + + # Adaptive mask plot + reporting.static_figures.plot_adaptive_mask( + optcom=data_optcom, + base_mask=base_mask, + io_generator=io_generator, + ) + + # Carpet plot + reporting.static_figures.carpet_plot( + optcom_ts=data_optcom, + denoised_ts=data_denoised, + hikts=data_accepted, + lowkts=data_rejected, + mask=mask_denoise, + io_generator=io_generator, + gscontrol=gscontrol, + ) + + # Component figures + reporting.static_figures.comp_figures( + data_optcom, + mask=mask_denoise, + component_table=component_table, + mixing=mixing, + io_generator=io_generator, + png_cmap=png_cmap, + ) + + # T2* and S0 plots + reporting.static_figures.plot_t2star_and_s0(io_generator=io_generator, mask=mask_denoise) + + # RMSE plot (only if T2* map was computed, not provided) + if not t2smap_provided and masksum_denoise is not None: + reporting.static_figures.plot_rmse( + io_generator=io_generator, + adaptive_mask=masksum_denoise, + ) + + # Global signal control plots + if gscontrol: + reporting.static_figures.plot_gscontrol( + io_generator=io_generator, + gscontrol=gscontrol, + ) + + # External regressors correlation heatmap + if external_regressors is not None: + comp_names = component_table["Component"].values + mixing_df = pd.DataFrame(data=mixing, columns=comp_names) + reporting.static_figures.plot_heatmap( + mixing=mixing_df, + external_regressors=external_regressors, + component_table=component_table, + out_file=os.path.join( + io_generator.out_dir, + "figures", + f"{io_generator.prefix}confound_correlations.svg", + ), + ) + + +def generate_dynamic_report( + io_generator: Any, + cluster_labels: Optional[np.ndarray] = None, + similarity_t_sne: Optional[np.ndarray] = None, +) -> None: + """Generate interactive HTML report. + + Parameters + ---------- + io_generator : OutputGenerator + Output generator for saving files. + cluster_labels : np.ndarray, optional + Cluster labels from robustica. + similarity_t_sne : np.ndarray, optional + t-SNE similarity from robustica. + """ + LGR.info("Generating dynamic report") + reporting.generate_report(io_generator, cluster_labels, similarity_t_sne) + + +def generate_reclassify_figures( + data_optcom: np.ndarray, + mask_denoise: np.ndarray, + component_table: pd.DataFrame, + mixing: np.ndarray, + io_generator: Any, + png_cmap: str, + gscontrol: List[str], +) -> None: + """Generate figures for ica_reclassify workflow. + + This is a simpler version that doesn't include all the plots + from the full tedana workflow. + + Parameters + ---------- + data_optcom : np.ndarray + Optimally combined data (S x T). + mask_denoise : np.ndarray + Denoising mask. + component_table : pd.DataFrame + Component metrics table. + mixing : np.ndarray + Mixing matrix (T x C). + io_generator : OutputGenerator + Output generator for saving files. + png_cmap : str + Colormap for figures. + gscontrol : list of str + Global signal control methods used. + """ + LGR.info("Making figures folder with static component maps and timecourse plots.") + + # Generate denoised timeseries for carpet plot + dn_ts, hikts, lowkts = io.denoise_ts(data_optcom, mixing, mask_denoise, component_table) + + # Carpet plot + reporting.static_figures.carpet_plot( + optcom_ts=data_optcom, + denoised_ts=dn_ts, + hikts=hikts, + lowkts=lowkts, + mask=mask_denoise, + io_generator=io_generator, + gscontrol=gscontrol, + ) + + # Component figures + reporting.static_figures.comp_figures( + data_optcom, + mask=mask_denoise, + component_table=component_table, + mixing=mixing, + io_generator=io_generator, + png_cmap=png_cmap, + ) diff --git a/tedana/workflows/shared/setup.py b/tedana/workflows/shared/setup.py new file mode 100644 index 000000000..9fd6e6c47 --- /dev/null +++ b/tedana/workflows/shared/setup.py @@ -0,0 +1,128 @@ +"""Setup and teardown utilities for tedana workflows. + +This module provides common setup and teardown functions used by +all tedana workflows. +""" + +import datetime +import logging +import os +import os.path as op +from glob import glob +from typing import Tuple + +from tedana import io, utils + +LGR = logging.getLogger("GENERAL") + + +def setup_output_directory(out_dir: str) -> str: + """Create and return absolute path to output directory. + + Parameters + ---------- + out_dir : str + Path to output directory. + + Returns + ------- + str + Absolute path to output directory. + """ + out_dir = op.abspath(out_dir) + if not op.isdir(out_dir): + os.mkdir(out_dir) + return out_dir + + +def rename_previous_reports(out_dir: str, prefix: str) -> Tuple[str, str]: + """Rename any previous report files and return new report paths. + + Parameters + ---------- + out_dir : str + Output directory path. + prefix : str + Prefix for filenames. + + Returns + ------- + tuple of (str, str) + Tuple containing (repname, bibtex_file) paths. + """ + prefix = io._infer_prefix(prefix) + basename = f"{prefix}report" + extension = "txt" + repname = op.join(out_dir, f"{basename}.{extension}") + bibtex_file = op.join(out_dir, f"{prefix}references.bib") + + # Rename previous report files + repex = op.join(out_dir, f"{basename}*") + previousreps = glob(repex) + previousreps.sort(reverse=True) + for f in previousreps: + previousparts = op.splitext(f) + newname = previousparts[0] + "_old" + previousparts[1] + os.rename(f, newname) + + return repname, bibtex_file + + +def setup_logging( + out_dir: str, + repname: str, + quiet: bool = False, + debug: bool = False, +) -> str: + """Set up logging for the workflow. + + Parameters + ---------- + out_dir : str + Output directory path. + repname : str + Report filename for the report logger. + quiet : bool, optional + Whether to suppress logging output. Default is False. + debug : bool, optional + Whether to enable debug logging. Default is False. + + Returns + ------- + str + Path to the log file. + """ + start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H%M%S") + logname = op.join(out_dir, f"tedana_{start_time}.tsv") + utils.setup_loggers(logname, repname, quiet=quiet, debug=debug) + return logname + + +def save_workflow_command( + out_dir: str, + command: str, + filename: str = "tedana_call.sh", +) -> None: + """Save the workflow command to a shell script file. + + Parameters + ---------- + out_dir : str + Output directory path. + command : str + Command string to save. + filename : str, optional + Name of the output file. Default is "tedana_call.sh". + """ + with open(os.path.join(out_dir, filename), "w") as f: + f.write(command) + + +def teardown_workflow() -> None: + """Clean up after workflow completion. + + Logs completion message, newsletter info, and tears down loggers. + """ + LGR.info("Workflow completed") + utils.log_newsletter_info() + utils.teardown_loggers() diff --git a/tedana/workflows/t2smap.py b/tedana/workflows/t2smap.py index 81260f9d0..d5a3d75ca 100644 --- a/tedana/workflows/t2smap.py +++ b/tedana/workflows/t2smap.py @@ -11,9 +11,17 @@ from scipy import stats from threadpoolctl import threadpool_limits -from tedana import __version__, combine, decay, io, utils +from tedana import decay, io, utils from tedana.utils import parse_volume_indices from tedana.workflows.parser_utils import is_valid_file +from tedana.workflows.shared import ( + compute_optimal_combination_simple, + create_simple_adaptive_mask, + fit_decay_model_simple, + save_derivative_metadata, + save_workflow_command, + teardown_workflow, +) LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") @@ -336,9 +344,7 @@ def t2smap_workflow( # Save command into sh file, if the command-line interface was used if t2smap_command is not None: - command_file = open(os.path.join(out_dir, "t2smap_call.sh"), "w") - command_file.write(t2smap_command) - command_file.close() + save_workflow_command(out_dir, t2smap_command, "t2smap_call.sh") else: # Get variables passed to function if the tedana command is None variables = ", ".join(f"{name}={value}" for name, value in locals().items()) @@ -414,23 +420,15 @@ def t2smap_workflow( else: data_without_excluded_vols = data_cat - mask, masksum = utils.make_adaptive_mask( + mask, masksum = create_simple_adaptive_mask( data_without_excluded_vols, mask=mask, + masktype=masktype, n_independent_echos=n_independent_echos, - threshold=1, - methods=masktype, ) - LGR.info("Computing adaptive T2* map") - decay_function = decay.fit_decay if fitmode == "all" else decay.fit_decay_ts - (t2s_limited, s0_limited, t2s_full, s0_full) = decay_function( - data=data_without_excluded_vols, - tes=tes, - mask=mask, - adaptive_mask=masksum, - fittype=fittype, - n_threads=n_threads, + (t2s_limited, s0_limited, t2s_full, s0_full) = fit_decay_model_simple( + data_without_excluded_vols, tes, mask, masksum, fittype, fitmode, n_threads ) # Delete unused variable del data_without_excluded_vols @@ -456,7 +454,7 @@ def t2smap_workflow( LGR.info("Computing optimal combination") # optimally combine data, including the ignored volumes - data_optcom = combine.make_optcom(data_cat, tes, masksum, t2s=t2s_full, combmode=combmode) + data_optcom = compute_optimal_combination_simple(data_cat, tes, masksum, t2s_full, combmode) # clean up numerical errors for arr in (data_optcom, s0_full, t2s_full): @@ -481,43 +479,20 @@ def t2smap_workflow( io_generator.save_file(data_optcom, "combined img") # Write out BIDS-compatible description file - derivative_metadata = { - "Name": "t2smap Outputs", - "BIDSVersion": "1.5.0", - "DatasetType": "derivative", - "GeneratedBy": [ - { - "Name": "t2smap", - "Version": __version__, - "Description": ( - "A pipeline estimating T2* from multi-echo fMRI data and " - "combining data across echoes." - ), - "CodeURL": "https://github.com/ME-ICA/tedana", - "Node": { - "Name": info_dict["Node"], - "System": info_dict["System"], - "Machine": info_dict["Machine"], - "Processor": info_dict["Processor"], - "Release": info_dict["Release"], - "Version": info_dict["Version"], - }, - "Python": info_dict["Python"], - "Python_Libraries": info_dict["Python_Libraries"], - "Command": info_dict["Command"], - } - ], - } - - io_generator.save_file(derivative_metadata, "data description json") + save_derivative_metadata( + io_generator, + info_dict, + workflow_name="t2smap", + workflow_description=( + "A pipeline estimating T2* from multi-echo fMRI data and " + "combining data across echoes." + ), + ) io_generator.save_self() LGR.info("Workflow completed") - # Add newsletter info to the log - utils.log_newsletter_info() - - utils.teardown_loggers() + teardown_workflow() def _main(argv=None): diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index a479cad38..66f8e3f8e 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -1,34 +1,17 @@ """Run the "canonical" TE-Dependent ANAlysis workflow.""" import argparse -import datetime -import json import logging import os import os.path as op import shutil import sys -from glob import glob +from typing import Any, Dict, List, Optional, Union -import numpy as np import pandas as pd -from nilearn.masking import compute_epi_mask -from scipy import stats from threadpoolctl import threadpool_limits -import tedana.gscontrol as gsc -from tedana import ( - __version__, - combine, - decay, - decomposition, - io, - metrics, - reporting, - selection, - utils, -) -from tedana.bibtex import get_description_references +from tedana import decomposition, io, metrics, reporting, selection, utils from tedana.config import ( DEFAULT_ICA_METHOD, DEFAULT_N_MAX_ITER, @@ -43,6 +26,25 @@ check_tedpca_value, is_valid_file, ) +from tedana.workflows.shared import ( + apply_mir, + apply_tedort, + compute_optimal_combination, + create_adaptive_masks, + finalize_report_text, + fit_decay_model, + generate_dynamic_report, + generate_static_figures, + load_multiecho_data, + rename_previous_reports, + save_derivative_metadata, + save_workflow_command, + setup_logging, + teardown_workflow, + validate_tr, + write_denoised_results, + write_echo_results, +) LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") @@ -400,39 +402,39 @@ def _get_parser(): def tedana_workflow( - data, - tes, - out_dir=".", - mask=None, - convention="bids", - prefix="", - dummy_scans=0, - masktype=["dropout"], - fittype="loglin", - combmode="t2s", - n_independent_echos=None, - tree="tedana_orig", - external_regressors=None, - ica_method=DEFAULT_ICA_METHOD, - n_robust_runs=DEFAULT_N_ROBUST_RUNS, - tedpca="aic", - fixed_seed=DEFAULT_SEED, - maxit=DEFAULT_N_MAX_ITER, - maxrestart=DEFAULT_N_MAX_RESTART, - tedort=False, - gscontrol=None, - no_reports=False, - png_cmap="coolwarm", - verbose=False, - low_mem=False, - debug=False, - quiet=False, - overwrite=False, - t2smap=None, - mixing_file=None, - n_threads=1, - tedana_command=None, -): + data: Union[str, List[str]], + tes: List[float], + out_dir: str = ".", + mask: Optional[str] = None, + convention: str = "bids", + prefix: str = "", + dummy_scans: int = 0, + masktype: Optional[List[str]] = None, + fittype: str = "loglin", + combmode: str = "t2s", + n_independent_echos: Optional[int] = None, + tree: str = "tedana_orig", + external_regressors: Optional[str] = None, + ica_method: str = DEFAULT_ICA_METHOD, + n_robust_runs: int = DEFAULT_N_ROBUST_RUNS, + tedpca: Any = "aic", + fixed_seed: int = DEFAULT_SEED, + maxit: int = DEFAULT_N_MAX_ITER, + maxrestart: int = DEFAULT_N_MAX_RESTART, + tedort: bool = False, + gscontrol: Optional[Union[str, List[str]]] = None, + no_reports: bool = False, + png_cmap: str = "coolwarm", + verbose: bool = False, + low_mem: bool = False, + debug: bool = False, + quiet: bool = False, + overwrite: bool = False, + t2smap: Optional[str] = None, + mixing_file: Optional[str] = None, + n_threads: int = 1, + tedana_command: Optional[str] = None, +) -> Dict[str, Any]: """Run the "canonical" TE-Dependent ANAlysis workflow. Please remember to cite :footcite:t:`dupre2021te`. @@ -554,7 +556,7 @@ def tedana_workflow( If True, suppresses logging/printing of messages. Default is False. overwrite : :obj:`bool`, optional If True, force overwriting of files. Default is False. - n_threads : :obj:`int` or None, optional + n_threads : :obj:`int`, optional Number of threads to use. Used by threadpoolctl to set the parameter outside of the workflow function, as well as the number of threads to use for the decay model fitting. Default is 1. @@ -562,6 +564,15 @@ def tedana_workflow( If the command-line interface was used, this is the command that was run. Default is None. + Returns + ------- + results : dict + Dictionary containing workflow results including: + - 'component_table': Component metrics table + - 'selector': Component selector with classification results + - 'mixing': Mixing matrix + - 'n_accepted': Number of accepted components + Notes ----- This workflow writes out several files. For a complete list of the files @@ -571,88 +582,87 @@ def tedana_workflow( References ---------- .. footbibliography:: + + + Examples + -------- + Basic usage with three echoes: + + >>> results = tedana_workflow( + ... data=['echo1.nii.gz', 'echo2.nii.gz', 'echo3.nii.gz'], + ... tes=[14.5, 29.0, 43.5], + ... out_dir='tedana_output' + ... ) + + Access results from the returned dictionary: + + >>> component_table = results['component_table'] + >>> mixing_matrix = results['mixing'] + >>> n_accepted = results['n_accepted'] """ + # =========================================================================== + # Stage 1: Setup and Initialization + # =========================================================================== + # Set defaults + if masktype is None: + masktype = ["dropout"] + + if gscontrol is None: + gscontrol = [] + elif not isinstance(gscontrol, list): + gscontrol = [gscontrol] + + # Ensure tes are in appropriate format + tes = [float(te) for te in tes] + tes = utils.check_te_values(tes) + n_echos = len(tes) + + # Check tedpca value + tedpca = check_tedpca_value(tedpca, is_parser=False) + + # For z-catted files, make sure data is a list + if isinstance(data, str): + data = [data] + + # Setup output directory out_dir = op.abspath(out_dir) if not op.isdir(out_dir): os.mkdir(out_dir) - # boilerplate + # Setup boilerplate prefix = io._infer_prefix(prefix) - basename = f"{prefix}report" - extension = "txt" - repname = op.join(out_dir, (basename + "." + extension)) - bibtex_file = op.join(out_dir, f"{prefix}references.bib") - repex = op.join(out_dir, (basename + "*")) - previousreps = glob(repex) - previousreps.sort(reverse=True) - for f in previousreps: - previousparts = op.splitext(f) - newname = previousparts[0] + "_old" + previousparts[1] - os.rename(f, newname) - - # create logfile name - basename = "tedana_" - extension = "tsv" - start_time = datetime.datetime.now().strftime("%Y-%m-%dT%H%M%S") - logname = op.join(out_dir, (basename + start_time + "." + extension)) - utils.setup_loggers(logname, repname, quiet=quiet, debug=debug) - - # Save command into sh file, if the command-line interface was used - # TODO: use io_generator to save command + repname, bibtex_file = rename_previous_reports(out_dir, prefix) + setup_logging(out_dir, repname, quiet, debug) + + # Save command if tedana_command is not None: - command_file = open(os.path.join(out_dir, "tedana_call.sh"), "w") - command_file.write(tedana_command) - command_file.close() + save_workflow_command(out_dir, tedana_command, "tedana_call.sh") else: - # Get variables passed to function if the tedana command is None + # Generate command from arguments variables = ", ".join(f"{name}={value}" for name, value in locals().items()) - # From variables, remove everything after ", tedana_command" variables = variables.split(", tedana_command")[0] - tedana_command = f"tedana_workflow({variables})" + tedana_command = f"tedana_workflow({variables}, ...)" - LGR.info(f"Using output directory: {out_dir}") - - # ensure tes are in appropriate format - tes = [float(te) for te in tes] - tes = utils.check_te_values(tes) - n_echos = len(tes) - - # Coerce gscontrol to list - if not isinstance(gscontrol, list): - gscontrol = [gscontrol] - - # Check value of tedpca *if* it is a predefined string, - # a float in (0.0, 1.0) or an int >= 1 - tedpca = check_tedpca_value(tedpca, is_parser=False) + # Save system info + info_dict = utils.get_system_version_info() + info_dict["Command"] = tedana_command - # For z-catted files, make sure it's a list of size 1 - if isinstance(data, str): - data = [data] + LGR.info(f"Using output directory: {out_dir}") + # Initialize component selector LGR.info("Initializing and validating component selection tree") selector = ComponentSelector(tree, out_dir) + # =========================================================================== + # Stage 2: Data Loading + # =========================================================================== LGR.info(f"Loading input data: {[f for f in data]}") - data_cat, ref_img = io.load_data(data, n_echos=n_echos, dummy_scans=dummy_scans) - - # Load external regressors if provided - # Decided to do the validation here so that, if there are issues, an error - # will be raised before PCA/ICA - if ( - "external_regressor_config" in set(selector.tree.keys()) - and selector.tree["external_regressor_config"] is not None - ): - external_regressors, selector.tree["external_regressor_config"] = ( - metrics.external.load_validate_external_regressors( - external_regressors=external_regressors, - external_regressor_config=selector.tree["external_regressor_config"], - n_vols=data_cat.shape[2], - dummy_scans=dummy_scans, - ) - ) + me_data = load_multiecho_data(data, tes, dummy_scans) + n_vols = me_data.n_vols + # Setup IO generator io_generator = io.OutputGenerator( - ref_img, + me_data.ref_img, convention=convention, out_dir=out_dir, prefix=prefix, @@ -660,34 +670,29 @@ def tedana_workflow( overwrite=overwrite, verbose=verbose, ) - - # Record inputs to OutputGenerator - # TODO: turn this into an IOManager since this isn't really output io_generator.register_input(data) - # Save system info to json - info_dict = utils.get_system_version_info() - info_dict["Command"] = tedana_command + # Validate TR + validate_tr(me_data.ref_img) - n_samp, n_echos, n_vols = data_cat.shape - LGR.debug(f"Resulting data shape: {data_cat.shape}") - - # check if TR is 0 - img_t_r = io_generator.reference_img.header.get_zooms()[-1] - if img_t_r == 0: - raise OSError( - "Dataset has a TR of 0. This indicates incorrect" - " header information. To correct this, we recommend" - " using this snippet:" - "\n" - "https://gist.github.com/jbteves/032c87aeb080dd8de8861cb151bff5d6" - "\n" - "to correct your TR to the value it should be." + # Load external regressors if provided + external_regressors_data = None + if ( + "external_regressor_config" in set(selector.tree.keys()) + and selector.tree["external_regressor_config"] is not None + ): + external_regressors_data, selector.tree["external_regressor_config"] = ( + metrics.external.load_validate_external_regressors( + external_regressors=external_regressors, + external_regressor_config=selector.tree["external_regressor_config"], + n_vols=me_data.data_cat.shape[2], + dummy_scans=dummy_scans, + ) ) + # Handle pre-computed mixing file if mixing_file is not None and op.isfile(mixing_file): mixing_file = op.abspath(mixing_file) - # Allow users to re-run on same folder mixing_name_output = io_generator.get_name("ICA mixing tsv") mixing_file_new_path = op.join(io_generator.out_dir, op.basename(mixing_file)) if op.basename(mixing_file) != op.basename(mixing_name_output) and not op.isfile( @@ -695,8 +700,6 @@ def tedana_workflow( ): shutil.copyfile(mixing_file, mixing_file_new_path) else: - # Add "user_provided" to the mixing file's name if it's identical to the new file name - # or if there's already a file in the output directory with the same name shutil.copyfile( mixing_file, op.join(io_generator.out_dir, f"user_provided_{op.basename(mixing_file)}"), @@ -704,12 +707,13 @@ def tedana_workflow( elif mixing_file is not None: raise OSError("Argument '--mix' must be an existing file.") + # Handle pre-computed T2* map + t2smap_provided = t2smap is not None if t2smap is not None and op.isfile(t2smap): - t2smap_file = io_generator.get_name("t2star img") + t2smap_output = io_generator.get_name("t2star img") t2smap = op.abspath(t2smap) - # Allow users to re-run on same folder - if t2smap != t2smap_file: - shutil.copyfile(t2smap, t2smap_file) + if t2smap != t2smap_output: + shutil.copyfile(t2smap, t2smap_output) elif t2smap is not None: raise OSError("Argument 't2smap' must be an existing file.") @@ -718,131 +722,63 @@ def tedana_workflow( "\\citep{dupre2021te}." ) - if mask and not t2smap: - # TODO: add affine check - LGR.info("Using user-defined mask") - RepLGR.info("A user-defined mask was applied to the data.") - mask = utils.reshape_niimg(mask).astype(int) - elif t2smap and not mask: - LGR.info("Assuming user=defined T2* map is masked and using it to generate mask") - t2s_limited_sec = utils.reshape_niimg(t2smap) - t2s_limited = utils.sec2millisec(t2s_limited_sec) - t2s_full = t2s_limited.copy() - mask = (t2s_limited != 0).astype(int) - elif t2smap and mask: - LGR.info("Combining user-defined mask and T2* map to generate mask") - t2s_limited_sec = utils.reshape_niimg(t2smap) - t2s_limited = utils.sec2millisec(t2s_limited_sec) - t2s_full = t2s_limited.copy() - mask = utils.reshape_niimg(mask).astype(int) - mask[t2s_limited == 0] = 0 # reduce mask based on T2* map - else: - LGR.warning( - "Computing EPI mask from first echo using nilearn's compute_epi_mask function. " - "Most external pipelines include more reliable masking functions. " - "It is strongly recommended to provide an external mask, " - "and to visually confirm that mask accurately conforms to data boundaries." - ) - first_echo_img = io.new_nii_like(io_generator.reference_img, data_cat[:, 0, :]) - mask = compute_epi_mask(first_echo_img).get_fdata() - mask = utils.reshape_niimg(mask).astype(int) - RepLGR.info( - "An initial mask was generated from the first echo using " - "nilearn's compute_epi_mask function." - ) - - # Create an adaptive mask with at least 1 good echo, for denoising - mask_denoise, masksum_denoise = utils.make_adaptive_mask( - data_cat, - mask=mask, + # =========================================================================== + # Stage 3: Masking + # =========================================================================== + masks = create_adaptive_masks( + me_data, + mask_file=mask, + masktype=masktype, + io_generator=io_generator, + t2smap_file=t2smap, n_independent_echos=n_independent_echos, - threshold=1, - methods=masktype, ) - LGR.debug(f"Retaining {mask_denoise.sum()}/{n_samp} samples for denoising") - io_generator.save_file(masksum_denoise, "adaptive mask img") - # Create an adaptive mask with at least 3 good echoes, for classification - masksum_clf = masksum_denoise.copy() - masksum_clf[masksum_clf < 3] = 0 - mask_clf = masksum_clf.astype(bool) - RepLGR.info( - "A two-stage masking procedure was applied, in which a liberal mask " - "(including voxels with good data in at least the first echo) was used for " - "optimal combination, T2*/S0 estimation, and denoising, while a more conservative mask " - "(restricted to voxels with good data in at least the first three echoes) was used for " - "the component classification procedure." - ) - LGR.debug(f"Retaining {mask_clf.sum()}/{n_samp} samples for classification") - - if t2smap is None: - LGR.info("Computing T2* map") - t2s_limited, s0_limited, t2s_full, s0_full = decay.fit_decay( - data=data_cat, - tes=tes, - mask=mask_denoise, - adaptive_mask=masksum_denoise, - fittype=fittype, + # =========================================================================== + # Stage 4: T2*/S0 Estimation + # =========================================================================== + if not t2smap_provided: + decay_maps = fit_decay_model( + me_data, + masks, + fittype, + io_generator, + verbose=verbose, n_threads=n_threads, ) - - # set a hard cap for the T2* map - # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile - cap_t2s = stats.scoreatpercentile(t2s_full.flatten(), 99.5, interpolation_method="lower") - LGR.debug(f"Setting cap on T2* map at {utils.millisec2sec(cap_t2s):.5f}s") - t2s_full[t2s_full > cap_t2s * 10] = cap_t2s - io_generator.save_file(utils.millisec2sec(t2s_full), "t2star img") - io_generator.save_file(s0_full, "s0 img") - - if verbose: - io_generator.save_file(utils.millisec2sec(t2s_limited), "limited t2star img") - io_generator.save_file(s0_limited, "limited s0 img") - - # Calculate RMSE if S0 and T2* are fit - rmse_map, rmse_df = decay.rmse_of_fit_decay_ts( - data=data_cat, - tes=tes, - adaptive_mask=masksum_denoise, - t2s=t2s_limited, - s0=s0_limited, - fitmode="all", - ) - io_generator.save_file(rmse_map, "rmse img") - io_generator.add_df_to_file(rmse_df, "confounds tsv") - - # optimally combine data - data_optcom = combine.make_optcom( - data_cat, - tes, - masksum_denoise, - t2s=t2s_full, - combmode=combmode, + t2s_full = decay_maps.t2s_full + else: + # T2* map was provided, load it + t2s_limited_sec = utils.reshape_niimg(t2smap) + t2s_full = utils.sec2millisec(t2s_limited_sec) + + # =========================================================================== + # Stage 5: Optimal Combination + # =========================================================================== + optcom = compute_optimal_combination( + me_data, + masks, + type("DecayMaps", (), {"t2s_full": t2s_full})(), # Simple object with t2s_full + combmode, + io_generator, + gscontrol, ) + data_optcom = optcom.data_optcom - if "gsr" in gscontrol: - # regress out global signal - data_cat, data_optcom = gsc.gscontrol_raw( - data_cat=data_cat, - data_optcom=data_optcom, - n_echos=n_echos, - io_generator=io_generator, - ) - - fout = io_generator.save_file(data_optcom, "combined img") - LGR.info(f"Writing optimally combined data set: {fout}") - - # Default r_ica results to None as they are expected for the reports + # =========================================================================== + # Stage 6: Decomposition (PCA + ICA) + # =========================================================================== cluster_labels = None similarity_t_sne = None fastica_convergence_warning_count = None if mixing_file is None: - # Identify and remove thermal noise from data + # PCA decomposition data_reduced, n_components = decomposition.tedpca( - data_cat, + me_data.data_cat, data_optcom, - mask_clf, - masksum_clf, + masks.mask_clf, + masks.masksum_clf, io_generator, tes=tes, n_independent_echos=n_independent_echos, @@ -851,11 +787,11 @@ def tedana_workflow( rdaw=1.0, low_mem=low_mem, ) + if verbose: - io_generator.save_file(utils.unmask(data_reduced, mask_clf), "whitened img") + io_generator.save_file(utils.unmask(data_reduced, masks.mask_clf), "whitened img") - # Perform ICA, calculate metrics, and apply decision tree - # Restart when ICA fails to converge or too few BOLD components found + # ICA with restart logic keep_restarting = True n_restarts = 0 seed = fixed_seed @@ -881,27 +817,26 @@ def tedana_workflow( seed += 1 n_restarts = seed - fixed_seed - # Estimate betas and compute selection metrics for mixing matrix - # generated from dimensionally reduced data using full data (i.e., data - # with thermal noise) + # Compute component metrics necessary_metrics = selector.necessary_metrics - # The figures require some metrics that might not be used by the decision tree. extra_metrics = ["variance explained", "normalized variance explained", "kappa", "rho"] necessary_metrics = sorted(list(set(necessary_metrics + extra_metrics))) component_table, mixing = metrics.collect.generate_metrics( - data_cat=data_cat, + data_cat=me_data.data_cat, data_optcom=data_optcom, mixing=mixing, - adaptive_mask=masksum_clf, + adaptive_mask=masks.masksum_clf, tes=tes, n_independent_echos=n_independent_echos, io_generator=io_generator, label="ICA", metrics=necessary_metrics, - external_regressors=external_regressors, + external_regressors=external_regressors_data, external_regressor_config=selector.tree["external_regressor_config"], ) + + # Perform component selection LGR.info("Selecting components from ICA results") selector = selection.automatic_selection( component_table, @@ -910,9 +845,9 @@ def tedana_workflow( n_vols=n_vols, n_independent_echos=n_independent_echos, ) - n_likely_bold_comps = selector.n_likely_bold_comps_ + n_likely_bold = selector.n_likely_bold_comps_ - if n_likely_bold_comps == 0: + if n_likely_bold == 0: if ica_method.lower() == "robustica": LGR.warning("No BOLD components found with robustICA mixing matrix.") keep_restarting = False @@ -923,44 +858,53 @@ def tedana_workflow( keep_restarting = False else: LGR.warning("No BOLD components found. Re-attempting ICA.") - # If we're going to restart, temporarily allow force overwrite io_generator.overwrite = True - # Create a re-initialized selector object if rerunning - # Since external_regressor_config might have been expanded to remove - # regular expressions immediately after initialization, - # store and copy this key - tmp_external_regressor_config = selector.tree["external_regressor_config"] + # Re-initialize selector + tmp_ext_config = selector.tree["external_regressor_config"] selector = ComponentSelector(tree) - selector.tree["external_regressor_config"] = tmp_external_regressor_config - RepLGR.disabled = True # Disable the report to avoid duplicate text + selector.tree["external_regressor_config"] = tmp_ext_config + RepLGR.disabled = True else: keep_restarting = False - RepLGR.disabled = False # Re-enable the report after the while loop is escaped - io_generator.overwrite = overwrite # Re-enable original overwrite behavior + RepLGR.disabled = False + io_generator.overwrite = overwrite + + # Store robustica metrics + if ica_method.lower() == "robustica": + if selector.cross_component_metrics_ is None: + selector.cross_component_metrics_ = {} + selector.cross_component_metrics_["fastica_convergence_warning_count"] = ( + fastica_convergence_warning_count + ) + selector.cross_component_metrics_["robustica_mean_index_quality"] = index_quality + else: + # Use supplied mixing matrix LGR.info("Using supplied mixing matrix from ICA") mixing = pd.read_table(mixing_file).values - # selector = ComponentSelector(tree) + # Compute metrics necessary_metrics = selector.necessary_metrics - # The figures require some metrics that might not be used by the decision tree. extra_metrics = ["variance explained", "normalized variance explained", "kappa", "rho"] necessary_metrics = sorted(list(set(necessary_metrics + extra_metrics))) component_table, mixing = metrics.collect.generate_metrics( - data_cat=data_cat, + data_cat=me_data.data_cat, data_optcom=data_optcom, mixing=mixing, - adaptive_mask=masksum_clf, + adaptive_mask=masks.masksum_clf, tes=tes, n_independent_echos=n_independent_echos, io_generator=io_generator, label="ICA", metrics=necessary_metrics, - external_regressors=external_regressors, + external_regressors=external_regressors_data, external_regressor_config=selector.tree["external_regressor_config"], ) + + # Perform component selection + LGR.info("Selecting components from ICA results") selector = selection.automatic_selection( component_table, selector, @@ -972,33 +916,31 @@ def tedana_workflow( if selector.n_likely_bold_comps_ == 0: LGR.warning("No BOLD components found with user-provided ICA mixing matrix.") - if ica_method.lower() == "robustica": - # If robustica was used, store number of iterations where ICA failed - selector.cross_component_metrics_["fastica_convergence_warning_count"] = ( - fastica_convergence_warning_count - ) - selector.cross_component_metrics_["robustica_mean_index_quality"] = index_quality + # =========================================================================== + # Stage 7: Output Generation + # =========================================================================== + component_table = selector.component_table_ - # TODO The ICA mixing matrix should be written out after it is created - # It is currently being written after component selection is done - # and rewritten if an existing mixing matrix is given as an input + # Save mixing matrix comp_names = component_table["Component"].values mixing_df = pd.DataFrame(data=mixing, columns=comp_names) io_generator.save_file(mixing_df, "ICA mixing tsv") - betas_oc = utils.unmask(computefeats2(data_optcom, mixing, mask_denoise), mask_denoise) + # Save z-scored component maps + betas_oc = utils.unmask( + computefeats2(data_optcom, mixing, masks.mask_denoise), masks.mask_denoise + ) io_generator.save_file(betas_oc, "z-scored ICA components img") - # calculate the fit of rejected to accepted components to use as a quality measure - # Note: This adds a column to component_table & needs to run before the table is saved + # Calculate rejected component impact reporting.quality_metrics.calculate_rejected_components_impact(selector, mixing) - # Save component selector and tree + # Save selector and metrics selector.to_files(io_generator) - # Save metrics and metadata metric_metadata = metrics.collect.get_metadata(component_table) io_generator.save_file(metric_metadata, "ICA metrics json") + # Save decomposition metadata decomp_metadata = { "Method": ( "Independent components analysis with FastICA algorithm implemented by sklearn. " @@ -1014,181 +956,89 @@ def tedana_workflow( if selector.n_likely_bold_comps_ == 0: LGR.warning("No BOLD components detected! Please check data and results!") - # TODO: un-hack separate component_table - component_table = selector.component_table_ - + # Apply tedort if requested mixing_orig = mixing.copy() if tedort: - comps_accepted = selector.accepted_comps_ - comps_rejected = selector.rejected_comps_ - acc_ts = mixing[:, comps_accepted] - rej_ts = mixing[:, comps_rejected] - betas = np.linalg.lstsq(acc_ts, rej_ts, rcond=None)[0] - pred_rej_ts = np.dot(acc_ts, betas) - resid = rej_ts - pred_rej_ts - mixing[:, comps_rejected] = resid + mixing = apply_tedort(mixing, selector.accepted_comps_, selector.rejected_comps_) comp_names = [ io.add_decomp_prefix(comp, prefix="ICA", max_value=component_table.index.max()) for comp in range(selector.n_comps_) ] - mixing_df = pd.DataFrame(data=mixing, columns=comp_names) io_generator.save_file(mixing_df, "ICA orthogonalized mixing tsv") - RepLGR.info( - "Rejected components' time series were then " - "orthogonalized with respect to accepted components' time " - "series." - ) - io.writeresults( + # Write denoised results + write_denoised_results( data_optcom, - mask=mask_denoise, + mask=masks.mask_denoise, component_table=component_table, mixing=mixing, io_generator=io_generator, ) + # Apply MIR if requested if "mir" in gscontrol: - gsc.minimum_image_regression( + apply_mir( data_optcom=data_optcom, mixing=mixing, - mask=mask_denoise, + mask=masks.mask_denoise, component_table=component_table, classification_tags=selector.classification_tags, io_generator=io_generator, ) + # Write per-echo results if verbose if verbose: - io.writeresults_echoes(data_cat, mixing, mask_denoise, component_table, io_generator) + write_echo_results( + me_data.data_cat, mixing, masks.mask_denoise, component_table, io_generator + ) - # Write out registry of outputs + # Save registry and metadata io_generator.save_self() - - # Write out BIDS-compatible description file - derivative_metadata = { - "Name": "tedana Outputs", - "BIDSVersion": "1.5.0", - "DatasetType": "derivative", - "GeneratedBy": [ - { - "Name": "tedana", - "Version": __version__, - "Description": ( - "A denoising pipeline for the identification and removal " - "of non-BOLD noise from multi-echo fMRI data." - ), - "CodeURL": "https://github.com/ME-ICA/tedana", - "Node": { - "Name": info_dict["Node"], - "System": info_dict["System"], - "Machine": info_dict["Machine"], - "Processor": info_dict["Processor"], - "Release": info_dict["Release"], - "Version": info_dict["Version"], - }, - "Python": info_dict["Python"], - "Python_Libraries": info_dict["Python_Libraries"], - "Command": info_dict["Command"], - } - ], - } - with open(io_generator.get_name("data description json"), "w") as fo: - json.dump(derivative_metadata, fo, sort_keys=True, indent=4) - - RepLGR.info( - "\n\nThis workflow used numpy \\citep{van2011numpy}, scipy \\citep{virtanen2020scipy}, " - "pandas \\citep{mckinney2010data,reback2020pandas}, " - "scikit-learn \\citep{pedregosa2011scikit}, " - "nilearn, bokeh \\citep{bokehmanual}, matplotlib \\citep{Hunter2007}, " - "and nibabel \\citep{brett_matthew_2019_3233118}." - ) - - RepLGR.info( - "This workflow also used the Dice similarity index " - "\\citep{dice1945measures,sorensen1948method}." + save_derivative_metadata( + io_generator, + info_dict, + workflow_name="tedana", + workflow_description=( + "A denoising pipeline for the identification and removal " + "of non-BOLD noise from multi-echo fMRI data." + ), ) - with open(repname) as fo: - report = [line.rstrip() for line in fo.readlines()] - report = " ".join(report) - # Double-spaces reflect new paragraphs - report = report.replace(" ", "\n\n") - - with open(repname, "w") as fo: - fo.write(report) - - # Collect BibTeX entries for cited papers - references = get_description_references(report) - - with open(bibtex_file, "w") as fo: - fo.write(references) + # Finalize report text + finalize_report_text(repname, bibtex_file) + # =========================================================================== + # Stage 8: Report Generation + # =========================================================================== if not no_reports: - LGR.info("Making figures folder with static component maps and timecourse plots.") - - data_denoised, data_accepted, data_rejected = io.denoise_ts( - data_optcom, - mixing, - mask_denoise, - component_table, - ) - - reporting.static_figures.plot_adaptive_mask( - optcom=data_optcom, - base_mask=mask, - io_generator=io_generator, - ) - reporting.static_figures.carpet_plot( - optcom_ts=data_optcom, - denoised_ts=data_denoised, - hikts=data_accepted, - lowkts=data_rejected, - mask=mask_denoise, - io_generator=io_generator, - gscontrol=gscontrol, - ) - reporting.static_figures.comp_figures( - data_optcom, - mask=mask_denoise, + generate_static_figures( + data_optcom=data_optcom, + mask_denoise=masks.mask_denoise, + base_mask=masks.base_mask, component_table=component_table, mixing=mixing_orig, io_generator=io_generator, png_cmap=png_cmap, + gscontrol=gscontrol, + masksum_denoise=masks.masksum_denoise, + external_regressors=external_regressors_data, + t2smap_provided=t2smap_provided, ) - reporting.static_figures.plot_t2star_and_s0(io_generator=io_generator, mask=mask_denoise) - if t2smap is None: - reporting.static_figures.plot_rmse( - io_generator=io_generator, - adaptive_mask=masksum_denoise, - ) - - if gscontrol: - reporting.static_figures.plot_gscontrol( - io_generator=io_generator, - gscontrol=gscontrol, - ) - - if external_regressors is not None: - reporting.static_figures.plot_heatmap( - mixing=mixing_df, - external_regressors=external_regressors, - component_table=component_table, - out_file=os.path.join( - io_generator.out_dir, - "figures", - f"{io_generator.prefix}confound_correlations.svg", - ), - ) - - LGR.info("Generating dynamic report") - reporting.generate_report(io_generator, cluster_labels, similarity_t_sne) + generate_dynamic_report(io_generator, cluster_labels, similarity_t_sne) + # =========================================================================== + # Stage 9: Cleanup + # =========================================================================== LGR.info("Workflow completed") + teardown_workflow() - # Add newsletter info to the log - utils.log_newsletter_info() - - utils.teardown_loggers() + return { + "component_table": component_table, + "selector": selector, + "mixing": mixing, + "n_accepted": selector.n_accepted_comps_, + } def _main(argv=None):