Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Main logged verbose abc tweaks #22

Draft
wants to merge 8 commits into
base: main_logged_verbose_abc
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion src/calicost/utils_IO.py
Original file line number Diff line number Diff line change
Expand Up @@ -1531,7 +1531,7 @@ def bin_selection_basedon_normal(
model = Weighted_BetaBinom(
tmpX, np.ones(len(tmpX)), weights=np.ones(len(tmpX)), exposure=tmptotal_bb_RD
)
tmpres = model.fit(disp=0)
tmpres = model.fit()
tmpres.params[0] = 0.5
tmpres.params[-1] = max(tmpres.params[-1], min_betabinom_tau)
# remove bins if normal B allele frequencies fall out of 5%-95% probability range
Expand Down
56 changes: 53 additions & 3 deletions src/calicost/utils_distribution_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,10 @@ def __callback__(self, params):
def fit(
self,
start_params=None,
maxiter=10_000,
maxiter=1_500,
maxfun=5_000,
xtol=1.e-2,
ftol=1.e-2,
write_chain=True,
**kwargs,
):
Expand All @@ -157,8 +159,7 @@ def fit(
)

start = time.time()

# NB kwargs = {'xtol': 0.0001, 'ftol': 0.0001, disp: False}

kwargs.pop("disp", None)

tmp_path = f"{self.__class__.__name__.lower()}_chain.tmp"
Expand All @@ -181,6 +182,8 @@ def fit(
full_output=True,
retall=True,
disp=False,
xtol=xtol,
ftol=ftol,
**kwargs,
)

Expand Down Expand Up @@ -423,3 +426,50 @@ def __post_init__(self):
assert self.tumor_prop is not None, "Tumor proportion must be defined."

Weighted_BetaBinom_fixdispersion_mix.ninstance += 1


class BAF_Binom(GenericLikelihoodModel):
"""
Binomial model endog ~ BetaBin(exposure, tau * p, tau * (1 - p)), where p = exog @ params[:-1] and tau = params[-1].
This function fits the BetaBin params when samples are weighted by weights: max_{params} \sum_{s} weights_s * log P(endog_s | exog_s; params)

Used to estimate tumor proportion in utils_hmrf.

Attributes
----------
endog : array, (n_samples,)
Y values.

exog : array, (n_samples, n_features)
Design matrix.

weights : array, (n_samples,)
Sample weights.

exposure : array, (n_samples,)
Total number of trials. In BAF case, this is the total number of SNP-covering UMIs.
"""
def __init__(self, endog, exog, weights, exposure, offset, scaling, **kwds):
super(BAF_Binom, self).__init__(endog, exog, **kwds)

self.weights = weights
self.exposure = exposure
self.offset = offset
self.scaling = scaling

def nloglikeobs(self, params):
linear_term = self.exog @ params
p = self.scaling / (1 + np.exp(-linear_term + self.offset))
llf = scipy.stats.binom.logpmf(self.endog, self.exposure, p)
neg_sum_llf = -llf.dot(self.weights)
return neg_sum_llf

def fit(self, start_params=None, maxiter=10000, maxfun=5000, **kwds):
if start_params is None:
if hasattr(self, "start_params"):
start_params = self.start_params
else:
start_params = 0.5 / np.sum(self.exog.shape[1]) * np.ones(self.nparams)
return super(BAF_Binom, self).fit(
start_params=start_params, maxiter=maxiter, maxfun=maxfun, **kwds
)
Loading