Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rust #11

Draft
wants to merge 38 commits into
base: main
Choose a base branch
from
Draft

Rust #11

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
fb5c9cd
init changes
michaelJwilson Jul 10, 2024
44b5689
tidy
michaelJwilson Jul 10, 2024
0bb6a5c
fix
michaelJwilson Jul 10, 2024
f6b7cd0
extract emission from hmrfmix_reassignment_posterior_concatenate_v1
michaelJwilson Jul 10, 2024
4b9accf
fix
michaelJwilson Jul 10, 2024
e0695da
fix
michaelJwilson Jul 10, 2024
1d9b53c
fix
michaelJwilson Jul 10, 2024
1d62930
asserts on nclones for raw data test.
michaelJwilson Jul 10, 2024
d390816
fix raw
michaelJwilson Jul 10, 2024
a58fb90
fix
michaelJwilson Jul 10, 2024
b8f7347
test_hmrfmix_reassignment_posterior_concatenate_emission_v1 vetted an…
michaelJwilson Jul 10, 2024
16ed26f
fix
michaelJwilson Jul 10, 2024
ec037e2
fix
michaelJwilson Jul 10, 2024
bd19581
fix
michaelJwilson Jul 10, 2024
d819b5a
TODO exp returns an added dimension. Otherwise, looks good.
michaelJwilson Jul 10, 2024
d9ea196
fix
michaelJwilson Jul 10, 2024
d21cdd1
fix
michaelJwilson Jul 10, 2024
db23f27
50%, but runs.
michaelJwilson Jul 10, 2024
257bd0d
fix
michaelJwilson Jul 10, 2024
19b5f2f
profile
michaelJwilson Jul 10, 2024
e8d09d9
fix
michaelJwilson Jul 10, 2024
31d9cc3
fix
michaelJwilson Jul 10, 2024
397ded0
fix
michaelJwilson Jul 10, 2024
afd0c9f
calicostem unit tests.
michaelJwilson Jul 11, 2024
0c880b0
rust attempt
michaelJwilson Jul 11, 2024
30f1543
works with a speedup of
michaelJwilson Jul 11, 2024
58b3d6f
fix
michaelJwilson Jul 12, 2024
6fcada5
fix
michaelJwilson Jul 12, 2024
60b35cc
add benchmark group names
michaelJwilson Jul 12, 2024
7b8c4a6
running with 84% correct and 60% gain in speed.
michaelJwilson Jul 12, 2024
09e2080
tidy weighted bb.
michaelJwilson Jul 12, 2024
e968c1f
working weighted bb.
michaelJwilson Jul 12, 2024
330c0db
fix
michaelJwilson Jul 12, 2024
b22069b
fix
michaelJwilson Jul 12, 2024
29bddad
fix
michaelJwilson Jul 12, 2024
80c8b0d
fix
michaelJwilson Jul 12, 2024
3d50d50
fix
michaelJwilson Jul 12, 2024
17c9819
rm tar
michaelJwilson Sep 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 184 additions & 4 deletions src/calicost/hmm_NB_BB_nophasing_v2.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import numpy as np
import line_profiler
from numba import njit
from scipy.stats import norm, multivariate_normal, poisson
import scipy.special
Expand All @@ -12,6 +13,8 @@
import copy
from calicost.utils_distribution_fitting import *
from calicost.utils_hmm import *
from calicost.utils_tumor import get_tumor_weight
from calicost.utils_thread_emission import thread_betabinom, thread_nbinom
import networkx as nx

"""
Expand All @@ -35,7 +38,7 @@ def __init__(self, params="stmp", t=1-1e-4):
"""
self.params = params
self.t = t
#

@staticmethod
def compute_emission_probability_nb_betabinom(X, base_nb_mean, log_mu, alphas, total_bb_RD, p_binom, taus):
"""
Expand Down Expand Up @@ -88,9 +91,42 @@ def compute_emission_probability_nb_betabinom(X, base_nb_mean, log_mu, alphas, t
if len(idx_nonzero_baf) > 0:
log_emission_baf[i, idx_nonzero_baf, s] = scipy.stats.betabinom.logpmf(X[idx_nonzero_baf,1,s], total_bb_RD[idx_nonzero_baf,s], p_binom[i, s] * taus[i, s], (1-p_binom[i, s]) * taus[i, s])
return log_emission_rdr, log_emission_baf
#

@staticmethod
def compute_emission_probability_nb_betabinom_mix(X, base_nb_mean, log_mu, alphas, total_bb_RD, p_binom, taus, tumor_prop, **kwargs):
def compute_emission_probability_nb_betabinom_v2(X, base_nb_mean, log_mu, alphas, total_bb_RD, p_binom, taus):
n_states = log_mu.shape[0]
(n_obs, n_comp, n_spots) = X.shape

# NB (n_states, n_obs, n_spots) == (7, 4_248, 1)
log_emission_rdr = np.zeros(shape=(n_states, n_obs, n_spots), dtype=float)

# NB nb_mean, nb_std: (segments, spots) * (states, spots) = (states, segments, spots) == (7, 4248, 1)
nb_mean = np.exp(log_mu)[:, None, :] * base_nb_mean[None, :, :]
nb_var = nb_mean + alphas[:, None, :] * nb_mean**2

kk = np.tile(X[:, 0, :], (n_states, 1, 1))
nn, pp = convert_params_var(nb_mean, nb_var)

idx = np.tile(base_nb_mean > 0., (n_states, 1, 1))
log_emission_rdr[idx] = scipy.stats.nbinom(kk[idx], nn[idx], pp[idx])

# NB BAF
log_emission_baf = np.zeros(shape=(n_states, n_obs, n_spots), dtype=float)

kk = np.tile(X[:, 1, :], (n_states, 1, 1))
nn = np.tile(total_bb_RD[:, :], (n_states, 1, 1))

# NB (states, spots)
aa = np.tile((p_binom * taus)[:, None, :], (1, n_obs, 1))
bb = np.tile(((1. - p_binom) * taus)[:, None, :], (1, n_obs, 1))

idx = np.tile(total_bb_RD > 0., (n_states, 1, 1))
log_emission_baf[idx] = scipy.stats.betabinom(kk[idx], nn[idx], aa[idx], bb[idx])

return log_emission_rdr, log_emission_baf

@staticmethod
def compute_emission_probability_nb_betabinom_mix_v1(X, base_nb_mean, log_mu, alphas, total_bb_RD, p_binom, taus, tumor_prop, **kwargs):
"""
Attributes
----------
Expand Down Expand Up @@ -124,9 +160,11 @@ def compute_emission_probability_nb_betabinom_mix(X, base_nb_mean, log_mu, alpha
n_comp = X.shape[1]
n_spots = X.shape[2]
n_states = log_mu.shape[0]

# initialize log_emission
log_emission_rdr = np.zeros((n_states, n_obs, n_spots))
log_emission_baf = np.zeros((n_states, n_obs, n_spots))

for i in np.arange(n_states):
for s in np.arange(n_spots):
# expression from NB distribution
Expand All @@ -153,7 +191,149 @@ def compute_emission_probability_nb_betabinom_mix(X, base_nb_mean, log_mu, alpha
mix_p_B = (1 - p_binom[i, s]) * this_weighted_tp[idx_nonzero_baf] + 0.5 * (1 - this_weighted_tp[idx_nonzero_baf])
log_emission_baf[i, idx_nonzero_baf, s] += scipy.stats.betabinom.logpmf(X[idx_nonzero_baf,1,s], total_bb_RD[idx_nonzero_baf,s], mix_p_A * taus[i, s], mix_p_B * taus[i, s])
return log_emission_rdr, log_emission_baf
#

@staticmethod
@line_profiler.profile
def compute_emission_probability_nb(X, base_nb_mean, log_mu, alphas, total_bb_RD, p_binom, taus, tumor_prop, **kwargs):
n_states = log_mu.shape[0]
n_obs, n_comp, n_spots = X.shape

# NB (n_states, n_obs, n_spots) == (7, 4248, 1)
log_emission_rdr = np.zeros(shape=(n_states, n_obs, n_spots), dtype=float)

assert base_nb_mean.shape == (n_obs, n_spots)
assert tumor_prop.shape == (n_obs, n_spots)
assert log_mu.shape == (n_states, n_spots)

# NB nb_mean, nb_std: (segments, spots) * (states, spots) = (states, segments, spots) == (7, 4248, 1)
nb_mean = base_nb_mean[None, :, :] * (tumor_prop[None, :, :] * np.exp(log_mu)[:, None, :] + 1. - tumor_prop[None, :, :])
nb_var = nb_mean + alphas[:, None, :] * nb_mean**2

kk = np.tile(X[:, 0, :], (n_states, 1, 1))
nn, pp = convert_params_var(nb_mean, nb_var)

idx = np.tile(base_nb_mean > 0., (n_states, 1, 1))
log_emission_rdr[idx] = scipy.stats.nbinom.logpmf(kk[idx], nn[idx], pp[idx])

return log_emission_rdr

@staticmethod
@line_profiler.profile
def compute_emission_probability_bb_mix(X, base_nb_mean, total_bb_RD, p_binom, taus, tumor_prop, tumor_weight=None):
n_states = p_binom.shape[0]
n_obs, n_comp, n_spots = X.shape

assert base_nb_mean.shape == (n_obs, n_spots)
assert tumor_prop.shape == (n_obs, n_spots)
assert p_binom.shape == (n_states, n_spots)

log_emission_baf = np.zeros((n_states, n_obs, n_spots))

if tumor_weight is None:
tumor_weight = tumor_prop

mix_p_A = p_binom[:, None, :] * tumor_weight + 0.5 * (1. - tumor_weight)
mix_p_B = (1. - p_binom)[:, None, :] * tumor_weight + 0.5 * (1. - tumor_weight)

aa = mix_p_A * taus[:, None, :]
bb = mix_p_B * taus[:, None, :]

kk = np.tile(X[:, 1, :], (n_states, 1, 1))
nn = np.tile(total_bb_RD[:, :], (n_states, 1, 1))

idx = np.tile(total_bb_RD > 0., (n_states, 1, 1))

log_emission_baf[idx] = scipy.stats.betabinom.logpmf(kk[idx], nn[idx], aa[idx], bb[idx])
return log_emission_baf

@staticmethod
@line_profiler.profile
def compute_emission_probability_nb_betabinom_mix(X, base_nb_mean, log_mu, alphas, total_bb_RD, p_binom, taus, tumor_prop, **kwargs):
"""
Attributes
----------
X : array, shape (n_observations, n_components, n_spots)
Observed expression UMI count and allele frequency UMI count.

base_nb_mean : array, shape (n_observations, n_spots)
Mean expression under diploid state.

log_mu : array, shape (n_states, n_spots)
Log of read depth change due to CNV. Mean of NB distributions in HMM per state per spot.

alphas : array, shape (n_states, n_spots)
Over-dispersion of NB distributions in HMM per state per spot.

total_bb_RD : array, shape (n_observations, n_spots)
SNP-covering reads for both REF and ALT across genes along genome.

p_binom : array, shape (n_states, n_spots)
BAF due to CNV. Mean of Beta Binomial distribution in HMM per state per spot.

taus : array, shape (n_states, n_spots)
Over-dispersion of Beta Binomial distribution in HMM per state per spot.

tumor_prop: array, shape (n_obs, n_spots)
Tumor proportion

Returns
----------
log_emission : array, shape (n_states, n_obs, n_spots)
Log emission probability for each gene each spot (or sample) under each state. There is a common bag of states across all spots.
"""
n_states = log_mu.shape[0]
n_obs, n_comp, n_spots = X.shape

# NB (n_states, n_obs, n_spots) == (7, 4248, 1)
log_emission_rdr = np.zeros(shape=(n_states, n_obs, n_spots), dtype=float)

assert base_nb_mean.shape == (n_obs, n_spots)
assert tumor_prop.shape == (n_obs, n_spots)
assert log_mu.shape == (n_states, n_spots)

# NB nb_mean, nb_std: (segments, spots) * (states, spots) = (states, segments, spots) == (7, 4248, 1)
nb_mean = base_nb_mean[None, :, :] * (tumor_prop[None, :, :] * np.exp(log_mu)[:, None, :] + 1. - tumor_prop[None, :, :])
nb_var = nb_mean + alphas[:, None, :] * nb_mean**2

kk = np.tile(X[:, 0, :], (n_states, 1, 1))
nn, pp = convert_params_var(nb_mean, nb_var)

idx = np.tile(base_nb_mean > 0., (n_states, 1, 1))

# log_emission_rdr[idx] = scipy.stats.nbinom.logpmf(kk[idx], nn[idx], pp[idx])
log_emission_rdr[idx] = thread_nbinom(kk[idx], nn[idx], pp[idx])

if ("logmu_shift" in kwargs) and ("sample_length" in kwargs):
sample_lengths = kwargs["sample_length"]
logmu_shift = kwargs["logmu_shift"]

# TODO HACK ask Cong.
logmu_shift = np.tile(logmu_shift, (1, n_spots))

# NB see ../src/calicost/utils_tumor.py
tumor_weight = get_tumor_weight(sample_lengths, tumor_prop, log_mu, logmu_shift)
else:
tumor_weight = np.tile(tumor_prop, (n_states, 1, 1))

# NB initialize log_emission
log_emission_baf = np.zeros((n_states, n_obs, n_spots))

mix_p_A = p_binom[:, None, :] * tumor_weight + 0.5 * (1. - tumor_weight)
mix_p_B = (1. - p_binom)[:, None, :] * tumor_weight + 0.5 * (1. - tumor_weight)

aa = mix_p_A * taus[:, None, :]
bb = mix_p_B * taus[:, None, :]

kk = np.tile(X[:, 1, :], (n_states, 1, 1))
nn = np.tile(total_bb_RD[:, :], (n_states, 1, 1))

idx = np.tile(total_bb_RD > 0., (n_states, 1, 1))

# log_emission_baf[idx] = scipy.stats.betabinom.logpmf(kk[idx], nn[idx], aa[idx], bb[idx])
log_emission_baf[idx] = thread_betabinom(kk[idx], nn[idx], aa[idx], bb[idx])

return log_emission_rdr, log_emission_baf

@staticmethod
@njit
def forward_lattice(lengths, log_transmat, log_startprob, log_emission, log_sitewise_transmat):
Expand Down
Loading