biomarkerPanels/man/optimize_panel.Rd at 755d80a1721b35fddf0ed8734219507dd0a49b4b · SydneyBioX/biomarkerPanels · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/optimization.R
\name{optimize_panel}
\alias{optimize_panel}
\title{Optimize Biomarker Panels with NSGA-II}
\usage{
optimize_panel(
  x,
  y,
  objectives = define_objectives(losses = c("sensitivity", "specificity",
    "num_features")),
  max_features = 5L,
  feature_pool = NULL,
  cohort_aggregator = c("pairwise_ratios", "none"),
  constraints = list(),
  scoring_fn = NULL,
  nsga_control = list(),
  assay = NULL
)
}
\arguments{
\item{x}{Matrix-like object, \code{SummarizedExperiment}, or list of matrices /
experiments representing one or more cohorts.}

\item{y}{Binary response (\code{factor}, \code{character}, or \code{logical}) aligned with
\code{x}. When \code{x} is a list, \code{y} must be a list of the same length.}

\item{objectives}{Named list of objective descriptors as returned by
\code{\link[=define_objectives]{define_objectives()}}.}

\item{max_features}{Maximum number of biomarkers permitted in a panel.}

\item{feature_pool}{Optional subset of feature identifiers (names or integer
indices) considered during optimization. When a cohort aggregator is used,
specify the underlying (pre-aggregation) feature names; aggregated labels
such as \code{"A--B"} are also accepted and will be mapped back to their
constituents. Defaults to all features.}

\item{cohort_aggregator}{Transformation applied to cohort feature matrices
prior to alignment. Defaults to \code{"pairwise_ratios"}, which generates
pairwise within-cohort contrasts via \code{\link[=pairwise_col_diff]{pairwise_col_diff()}} to dampen
distributional shifts across sites. Future work: support additional
harmonisation strategies (e.g., empirical Bayes, domain adversarial
mappings).}

\item{constraints}{Optional list of constraint descriptors (e.g.,
from \code{\link[=min_metric_constraint]{min_metric_constraint()}}) that must evaluate to \code{TRUE} for a candidate
solution to be considered feasible.}

\item{cohort_aggregator}{Transformation applied to cohort feature matrices
prior to alignment. Defaults to \code{"pairwise_ratios"}, which generates
pairwise within-cohort contrasts via \code{\link[=pairwise_col_diff]{pairwise_col_diff()}} to dampen
distributional shifts across sites. Future work: support additional
harmonisation strategies (e.g., empirical Bayes, domain adversarial
mappings).}

\item{constraints}{Optional list of constraint descriptors (e.g.,
from \code{\link[=min_metric_constraint]{min_metric_constraint()}}) that must evaluate to \code{TRUE} for a candidate
solution to be considered feasible.}

\item{scoring_fn}{Function producing per-sample scores from the selected
features. Signature:
\verb{function(x_selected, selected_features, truth, cohort = NULL, ...)}.}

\item{nsga_control}{Named list of arguments passed to \code{\link[mco:nsga2]{mco::nsga2()}}. Defaults
to \code{list(popsize = 64, generations = 60, cprob = 0.7, cdist = 5, mprob = 0.2, mdist = 10)}.}

\item{assay}{For \code{SummarizedExperiment} inputs, assay name or index to use.}
}
\value{
A \code{BiomarkerPanelResult} with Pareto-optimal solutions summarised in
the \code{objectives} slot.
}
\description{
Wrapper around \code{\link[mco:nsga2]{mco::nsga2()}} that composes registered loss functions into a
multi-objective search for compact biomarker panels. Candidate solutions are
represented as weights over a feature pool; the top \code{max_features} features
define a panel whose performance is evaluated via the selected losses.
Inputs may be a single cohort (\code{matrix}, \code{data.frame}, or
\code{SummarizedExperiment}) or multiple cohorts supplied as lists of such objects.
Feature alignment across cohorts currently relies on the simple intersection
of shared column names; future releases will add more flexible harmonisation.
Optimisation should be run on training data only—use \code{\link[=evaluate_panel]{evaluate_panel()}} for
held-out validation.
}