Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Production2 #13

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
10 changes: 6 additions & 4 deletions src/calicost/arg_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def load_default_config():
"min_spots_per_clone" : 100,
"min_avgumi_per_clone" : 10,
"maxspots_pooling" : 7,
"tumorprop_threshold" : 0.5,
"max_iter_outer" : 20,
"tumorprop_threshold" : 0.5,
"max_iter_outer_initial" : 20,
"max_iter_outer" : 10,
"nodepotential" : "weighted_sum", # max or weighted_sum
"initialization_method" : "rectangle", # rectangle or datadrive
"num_hmrf_initialization_start" : 0,
Expand Down Expand Up @@ -100,7 +101,8 @@ def load_default_config():
"min_spots_per_clone" : "int",
"min_avgumi_per_clone" : "int",
"maxspots_pooling" : "int",
"tumorprop_threshold" : "float",
"tumorprop_threshold" : "float",
"max_iter_outer_initial" : "int",
"max_iter_outer" : "int",
"nodepotential" : "str",
"initialization_method" : "str",
Expand Down Expand Up @@ -132,7 +134,7 @@ def load_default_config():
category_elements = [["input_filelist", "spaceranger_dir", "snp_dir", "output_dir"], \
["geneticmap_file", "hgtable_file", "normalidx_file", "tumorprop_file", "alignment_files", "supervision_clone_file", "filtergenelist_file", "filterregion_file", "secondary_min_umi", "min_snpumi_perspot", "min_percent_expressed_spots", "bafonly"], \
["nu", "logphase_shift", "npart_phasing"], \
["n_clones", "n_clones_rdr", "min_spots_per_clone", "min_avgumi_per_clone", "maxspots_pooling", "tumorprop_threshold", "max_iter_outer", "nodepotential", "initialization_method", "num_hmrf_initialization_start", "num_hmrf_initialization_end", "spatial_weight", "construct_adjacency_method", "construct_adjacency_w"], \
["n_clones", "n_clones_rdr", "min_spots_per_clone", "min_avgumi_per_clone", "maxspots_pooling", "tumorprop_threshold", "max_iter_outer_initial", "max_iter_outer", "nodepotential", "initialization_method", "num_hmrf_initialization_start", "num_hmrf_initialization_end", "spatial_weight", "construct_adjacency_method", "construct_adjacency_w"], \
["n_states", "params", "t", "t_phaseing", "fix_NB_dispersion", "shared_NB_dispersion", "fix_BB_dispersion", "shared_BB_dispersion", "max_iter", "tol", "gmm_random_state", "np_threshold", "np_eventminlen"], \
["nonbalance_bafdist", "nondiploid_rdrdist"]]
return config_shared, config_joint, config_single, argtype_shared, argtype_joint, argtype_single, category_names, category_elements
Expand Down
13 changes: 7 additions & 6 deletions src/calicost/calicost_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@
from calicost.find_integer_copynumber import *
from calicost.parse_input import *
from calicost.utils_plotting import *
from calicost.utils_profiling import profile


@profile
def main(configuration_file):
try:
config = read_configuration_file(configuration_file)
Expand Down Expand Up @@ -66,14 +67,14 @@ def main(configuration_file):
# run HMRF + HMM
if config["tumorprop_file"] is None:
hmrf_concatenate_pipeline(outdir, prefix, single_X, lengths, single_base_nb_mean, single_total_bb_RD, initial_clone_index, n_states=config["n_states"], \
log_sitewise_transmat=log_sitewise_transmat, smooth_mat=smooth_mat, adjacency_mat=adjacency_mat, sample_ids=sample_ids, max_iter_outer=config["max_iter_outer"], nodepotential=config["nodepotential"], \
log_sitewise_transmat=log_sitewise_transmat, smooth_mat=smooth_mat, adjacency_mat=adjacency_mat, sample_ids=sample_ids, max_iter_outer=config["max_iter_outer_initial"], nodepotential=config["nodepotential"], \
hmmclass=hmm_nophasing_v2, params="sp", t=config["t"], random_state=config["gmm_random_state"], \
fix_NB_dispersion=config["fix_NB_dispersion"], shared_NB_dispersion=config["shared_NB_dispersion"], \
fix_BB_dispersion=config["fix_BB_dispersion"], shared_BB_dispersion=config["shared_BB_dispersion"], \
is_diag=True, max_iter=config["max_iter"], tol=config["tol"], spatial_weight=config["spatial_weight"])
else:
hmrfmix_concatenate_pipeline(outdir, prefix, single_X, lengths, single_base_nb_mean, single_total_bb_RD, single_tumor_prop, initial_clone_index, n_states=config["n_states"], \
log_sitewise_transmat=log_sitewise_transmat, smooth_mat=smooth_mat, adjacency_mat=adjacency_mat, sample_ids=sample_ids, max_iter_outer=config["max_iter_outer"], nodepotential=config["nodepotential"], \
log_sitewise_transmat=log_sitewise_transmat, smooth_mat=smooth_mat, adjacency_mat=adjacency_mat, sample_ids=sample_ids, max_iter_outer=config["max_iter_outer_initial"], nodepotential=config["nodepotential"], \
hmmclass=hmm_nophasing_v2, params="sp", t=config["t"], random_state=config["gmm_random_state"], \
fix_NB_dispersion=config["fix_NB_dispersion"], shared_NB_dispersion=config["shared_NB_dispersion"], \
fix_BB_dispersion=config["fix_BB_dispersion"], shared_BB_dispersion=config["shared_BB_dispersion"], \
Expand Down Expand Up @@ -181,14 +182,14 @@ def main(configuration_file):
copy_slice_sample_ids = copy.copy(sample_ids[idx_spots])
if config["tumorprop_file"] is None:
hmrf_concatenate_pipeline(outdir, prefix, single_X[:,:,idx_spots], lengths, single_base_nb_mean[:,idx_spots], single_total_bb_RD[:,idx_spots], initial_clone_index, n_states=config["n_states"], \
log_sitewise_transmat=log_sitewise_transmat, smooth_mat=smooth_mat[np.ix_(idx_spots,idx_spots)], adjacency_mat=adjacency_mat[np.ix_(idx_spots,idx_spots)], sample_ids=copy_slice_sample_ids, max_iter_outer=10, nodepotential=config["nodepotential"], \
log_sitewise_transmat=log_sitewise_transmat, smooth_mat=smooth_mat[np.ix_(idx_spots,idx_spots)], adjacency_mat=adjacency_mat[np.ix_(idx_spots,idx_spots)], sample_ids=copy_slice_sample_ids, max_iter_outer=config["max_iter_outer"], nodepotential=config["nodepotential"], \
hmmclass=hmm_nophasing_v2, params="smp", t=config["t"], random_state=config["gmm_random_state"], \
fix_NB_dispersion=config["fix_NB_dispersion"], shared_NB_dispersion=config["shared_NB_dispersion"], \
fix_BB_dispersion=config["fix_BB_dispersion"], shared_BB_dispersion=config["shared_BB_dispersion"], \
is_diag=True, max_iter=config["max_iter"], tol=config["tol"], spatial_weight=config["spatial_weight"])
else:
hmrfmix_concatenate_pipeline(outdir, prefix, single_X[:,:,idx_spots], lengths, single_base_nb_mean[:,idx_spots], single_total_bb_RD[:,idx_spots], single_tumor_prop[idx_spots], initial_clone_index, n_states=config["n_states"], \
log_sitewise_transmat=log_sitewise_transmat, smooth_mat=smooth_mat[np.ix_(idx_spots,idx_spots)], adjacency_mat=adjacency_mat[np.ix_(idx_spots,idx_spots)], sample_ids=copy_slice_sample_ids, max_iter_outer=10, nodepotential=config["nodepotential"], \
log_sitewise_transmat=log_sitewise_transmat, smooth_mat=smooth_mat[np.ix_(idx_spots,idx_spots)], adjacency_mat=adjacency_mat[np.ix_(idx_spots,idx_spots)], sample_ids=copy_slice_sample_ids, max_iter_outer=config["max_iter_outer"], nodepotential=config["nodepotential"], \
hmmclass=hmm_nophasing_v2, params="smp", t=config["t"], random_state=config["gmm_random_state"], \
fix_NB_dispersion=config["fix_NB_dispersion"], shared_NB_dispersion=config["shared_NB_dispersion"], \
fix_BB_dispersion=config["fix_BB_dispersion"], shared_BB_dispersion=config["shared_BB_dispersion"], \
Expand Down Expand Up @@ -441,4 +442,4 @@ def main(configuration_file):
parser.add_argument("-c", "--configfile", help="configuration file of CalicoST", required=True, type=str)
args = parser.parse_args()

main(args.configfile)
main(args.configfile)
4 changes: 2 additions & 2 deletions src/calicost/calicost_supervised.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
plt.rcParams.update({'font.size': 14})

import mkl
mkl.set_num_threads(1)
mkl.set_num_threads(4)


def main(configuration_file):
Expand Down Expand Up @@ -490,4 +490,4 @@ def main(configuration_file):

if __name__ == "__main__":
if len(sys.argv) > 1:
main(sys.argv[1])
main(sys.argv[1])
13 changes: 8 additions & 5 deletions src/calicost/hmm_NB_BB_nophasing_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import copy
from calicost.utils_distribution_fitting import *
from calicost.utils_hmm import *
from calicost.utils_profiling import profile
import networkx as nx

"""
Expand All @@ -35,8 +36,9 @@ def __init__(self, params="stmp", t=1-1e-4):
"""
self.params = params
self.t = t
#

@staticmethod
@profile
def compute_emission_probability_nb_betabinom(X, base_nb_mean, log_mu, alphas, total_bb_RD, p_binom, taus):
"""
Attributes
Expand Down Expand Up @@ -88,8 +90,9 @@ def compute_emission_probability_nb_betabinom(X, base_nb_mean, log_mu, alphas, t
if len(idx_nonzero_baf) > 0:
log_emission_baf[i, idx_nonzero_baf, s] = scipy.stats.betabinom.logpmf(X[idx_nonzero_baf,1,s], total_bb_RD[idx_nonzero_baf,s], p_binom[i, s] * taus[i, s], (1-p_binom[i, s]) * taus[i, s])
return log_emission_rdr, log_emission_baf
#

@staticmethod
@profile
def compute_emission_probability_nb_betabinom_mix(X, base_nb_mean, log_mu, alphas, total_bb_RD, p_binom, taus, tumor_prop, **kwargs):
"""
Attributes
Expand Down Expand Up @@ -153,7 +156,7 @@ def compute_emission_probability_nb_betabinom_mix(X, base_nb_mean, log_mu, alpha
mix_p_B = (1 - p_binom[i, s]) * this_weighted_tp[idx_nonzero_baf] + 0.5 * (1 - this_weighted_tp[idx_nonzero_baf])
log_emission_baf[i, idx_nonzero_baf, s] += scipy.stats.betabinom.logpmf(X[idx_nonzero_baf,1,s], total_bb_RD[idx_nonzero_baf,s], mix_p_A * taus[i, s], mix_p_B * taus[i, s])
return log_emission_rdr, log_emission_baf
#

@staticmethod
@njit
def forward_lattice(lengths, log_transmat, log_startprob, log_emission, log_sitewise_transmat):
Expand Down Expand Up @@ -187,7 +190,7 @@ def forward_lattice(lengths, log_transmat, log_startprob, log_emission, log_site
log_alpha[j, (cumlen + t)] = mylogsumexp(buf) + np.sum(log_emission[j, (cumlen + t), :])
cumlen += le
return log_alpha
#

@staticmethod
@njit
def backward_lattice(lengths, log_transmat, log_startprob, log_emission, log_sitewise_transmat):
Expand Down Expand Up @@ -223,7 +226,7 @@ def backward_lattice(lengths, log_transmat, log_startprob, log_emission, log_sit
cumlen += le
return log_beta

#

def run_baum_welch_nb_bb(self, X, lengths, n_states, base_nb_mean, total_bb_RD, log_sitewise_transmat=None, tumor_prop=None, \
fix_NB_dispersion=False, shared_NB_dispersion=False, fix_BB_dispersion=False, shared_BB_dispersion=False, \
is_diag=False, init_log_mu=None, init_p_binom=None, init_alphas=None, init_taus=None, max_iter=100, tol=1e-4, **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions src/calicost/hmm_NB_BB_phaseswitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ def similarity_components_rdrbaf_neymanpearson(X, base_nb_mean, total_bb_RD, res
new_assignment = np.array([map_clone_id[x] for x in res["new_assignment"]])
merged_res = copy.copy(res)
merged_res["new_assignment"] = new_assignment
merged_res["total_llf"] = np.NAN
merged_res["total_llf"] = np.nan
merged_res["pred_cnv"] = np.concatenate([ res["pred_cnv"][(c[0]*n_obs):(c[0]*n_obs+n_obs)] for c in merging_groups ])
merged_res["log_gamma"] = np.hstack([ res["log_gamma"][:, (c[0]*n_obs):(c[0]*n_obs+n_obs)] for c in merging_groups ])
return merging_groups, merged_res
Expand Down Expand Up @@ -864,7 +864,7 @@ def combine_similar_states_across_clones(X, base_nb_mean, total_bb_RD, res, para
# new_assignment = np.array([map_clone_id[x] for x in res["new_assignment"]])
# merged_res = copy.copy(res)
# merged_res["new_assignment"] = new_assignment
# merged_res["total_llf"] = np.NAN
# merged_res["total_llf"] = np.nan
# merged_res["pred_cnv"] = np.concatenate([ res["pred_cnv"][(c[0]*n_obs):(c[0]*n_obs+n_obs)] for c in merging_groups ])
# merged_res["log_gamma"] = np.hstack([ res["log_gamma"][:, (c[0]*n_obs):(c[0]*n_obs+n_obs)] for c in merging_groups ])
# return merging_groups, merged_res
Loading