Skip to content

Commit

Permalink
fix for issue #8
Browse files Browse the repository at this point in the history
  • Loading branch information
Rebecca G. Elyanow committed Dec 7, 2017
1 parent a14012d commit 15eba96
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 30 deletions.
41 changes: 16 additions & 25 deletions src/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,44 +16,38 @@
import numpy as np
import mpmath


class negBin(object):
def __init__(self, p = 0.1, r = 10):
nbin_mpmath = lambda k, p, r: mpmath.gamma(k + r)/(mpmath.gamma(k+1)\
*mpmath.gamma(r))*np.power(1-p, r)*np.power(p, k)
self.nbin = np.frompyfunc(nbin_mpmath, 3, 1)
self.p = p
self.r = r

def mleFun(self, par, data, sm):
p = par[0]
r = par[1]
n = len(data)
try:
f0 = sm/float(r+sm)-p
except ValueError:
print("r+sm=0")
print(data)
f1 = np.sum(special.psi(data+r)) - n*special.psi(r) + n*np.log(r/float(r+sm))
f0 = sm/(r+sm)-p
f1 = np.sum(special.psi(data+r)) - n*special.psi(r) + n*np.log(r/(r+sm))
return np.array([f0, f1])

def fit(self, data, p = None, r = None):
if p is None or r is None:
av = np.average(data)
va = np.var(data)
r = (av*av)/float(va-av)
p = (va-av)/float(va)
sm = np.sum(data)/(len(data))
if DEBUG:
print(av,va,r,p,sm)
if False and not va or not (va-av) or not (r+sm):
return 0
r = (av*av)/(va-av)
p = (va-av)/(va)
sm = np.sum(data)/len(data)
x = optimize.fsolve(self.mleFun, np.array([p, r]), args=(data, sm))
self.p = x[0]
self.r = x[1]
return 1


def pdf(self, k):
return self.nbin(k, self.p, self.r)
return self.nbin(k, self.p, self.r).astype('float64')



def plot_distribution(p,distr,xlab,ylab,title):
fname = '_'.join(title.split(' '))
Expand Down Expand Up @@ -119,22 +113,19 @@ def get_distributions(reads_by_LR):
for barcode,barcode_LRS in iteritems(LRs_by_barcode):
if len(barcode_LRS) > 1:
get_overlap(barcode_LRS)
if len(LRs) == 0:
if len(LRs) < 100:
return None,None,None
elif len(LRs) < 100:
print('WARNING: Few linked-reads detected')
p_rate = get_rate_distr(LRs)
p_len = get_length_distr(LRs)
return p_len,p_rate,barcode_overlap

def get_length_distr(LRs):
lengths = [x[2]-x[1] for x in LRs]
lengths.sort()
if len(lengths) > 10:
lengths = lengths[int(len(lengths)/10):int(len(lengths)/10*9)]
assert len(lengths)>100
assert np.var(lengths) != 0
b = negBin()
if not b.fit(lengths):
return None
b.fit(lengths)
p = b.pdf
pp = lambda x: max(1e-20,float(p([x])[0]))
## poisson distribution
Expand Down
6 changes: 3 additions & 3 deletions src/get_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ def make_barcodeDict(chrom):
for read in iterator:
cov += read.query_alignment_length
## DEBUG
# if DEBUG and cov > 100000000:
# break
if DEBUG and cov > 100000000:
break
if pass_checks(read):
barcode = get_barcode(read)
peread = PERead(read)
Expand Down Expand Up @@ -150,7 +150,7 @@ def get_candidates(discs,reads_by_LR):
candidates = []
r = lmax
p_len,p_rate,barcode_overlap = get_distributions(reads_by_LR)
if p_len == None:
if p_len == None or p_rate == None:
return None,None,None
num_cands = 0
for key,items in iteritems(discs):
Expand Down
4 changes: 2 additions & 2 deletions src/global_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ def estimate_lmin_lmax():
std_dist = np.std(ls)
lmin = max(-int(np.mean(length)),int(mean_dist-std_dist*SD_MULT))
lmax = int(mean_dist+std_dist*SD_MULT)
return lmin,lmax
return lmin,max(100,lmax)

lmin,lmax = estimate_lmin_lmax()


if 'min_sv' in constants:
min_sv = int(constants['min_sv'])
else:
min_sv = lmax
min_sv = 2*lmax

if 'min_len' in constants:
min_len = int(constants['min_len'])
Expand Down

0 comments on commit 15eba96

Please sign in to comment.