-
Notifications
You must be signed in to change notification settings - Fork 0
/
binner.py
175 lines (151 loc) · 7.55 KB
/
binner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import numpy as np
import danframe as dan
import astropy.stats as ast
import numpy.polynomial.polynomial as pol
import scipy.interpolate as si
import numpy.random as rnd
def binerror(binned,cont,line,reps=1000,feats=10):
bincent = np.convolve(binned.bins,np.array([0.5,0.5]),mode="valid")
sel, = np.where(binned.counts > 30)
sorting = np.digitize(cont,binned.bins[:-1])
result = np.zeros( (len(sel), 4*feats) )
var = np.zeros( (len(sel),len(line.idx)) )
for i,idx in enumerate(sel):
subs = binned.block[ sorting == (idx+1), : ]
subs = subs[:,line.idx]
for j,row in enumerate(subs.T):
men = row.mean()
var[i,j] = (row-men).std()/np.sqrt(subs.shape[0])
con = np.ones(reps)
for binNr,bn in enumerate(sel):
# print(binned.binned[binNr,line.idx].shape,np.sqrt(var[binNr,:]).shape)
mcblock = rnd.normal( loc=binned.binned[binNr,line.idx] , scale=np.sqrt(var[binNr,:]/binned.counts[bn]) ,size=(reps,len(line.idx)) )
mes = line.measure_on_block(binned.group,mcblock,con*bincent[binNr])
print("")
print("For bin {:.4f} with {} spectra".format(bincent[binNr],binned.counts[bn]))
j = 0
for i in range(0,feats):
# The percentile ranges are one and two sigmas, calculated by hand from a probability table on wikipedia
result[binNr,j], result[binNr,j+1],result[binNr,j+2],result[binNr,j+3] = (np.percentile(mes[:,i],2.28),
np.percentile(mes[:,i],15.87),
np.percentile(mes[:,i],84.13),
np.percentile(mes[:,i],97.72))
# print("-2s: {: 8.6f}, -1s {: 8.6f}, mu {: 8.6f}, +1s {: 8.6f}, +2s {: 8.6f}".format(
# result[binNr,j], result[binNr,j+1],mes[:,i].mean(),result[binNr,j+2],result[binNr,j+3]))
j+=4
return result
class binspec(object):
def __init__(self,group,cont,cond,method='blocks'):
"""
Averages spectra in [frame]group based on cond[ition]
if cond has dtype bool it makes two bins
else it histograms cond usin bayesian blocks finds average spectra per bin
cont[inuum] - continuum level for each row in the framegroup
"""
self.group = group
block = group.frames[0].data
for frm in group.frames[1:]:
block = np.vstack((block,frm.data))
self.block = block
if cond.dtype == np.dtype('bool'):
idx, = np.where(cond)
nidx, = np.where(~cond)
binned = np.mean( block[idx,:],axis=0)
con = np.mean( cont[idx])
self.binned = np.vstack( (binned, np.mean( block[nidx,:],axis=0)) )
self.con = np.vstack( (con , np.mean( cont[nidx])) )
self.bins = np.array([0,0,1])
self.counts = np.array([cond.sum(),~cond.sum()])
else:
if len(cond) != block.shape[0]:
raise IndexError("Length of cond does not match number of rows in framegroup")
counts, bins = ast.histogram(cond,bins=method)
sorting = np.digitize(cond,bins[:-1]) # :-1 to get the correct number of buckets from digitize
binned = np.mean(block[sorting == 1,:],axis=0)
con = np.mean( cont[sorting == 1])
for i in np.unique(sorting)[1:]:
binned = np.vstack( (binned, np.mean(block[sorting == i,:],axis=0) ))
con = np.vstack( (con , np.mean( cont[sorting == i]) ))
self.binned = binned
self.con = con
self.counts = counts
self.bins = bins
self.sorting = sorting
class binned_framegroup(object):
def __init__(self,line,group,bin_quant,cuts=None):
self.idx = line.idx
self.cent = line.cent
self.group = group
self.data,self.cont,self.bins,self.counts = self.__bin_by_quant(bin_quant,cuts)
def __bin_by_quant(self,quant,cuts):
if cuts is not None:
cuts, = np.where(cuts.reshape(-1))
else:
cuts = np.ones(len(quant)).astype(bool)
datablock = self.group.frames[0].data[:,self.idx]
contblock = self.group.frames[0].cont.val(self.cent)
for frm in self.group.frames[1:]:
datablock = np.concatenate((datablock,frm.data[:,self.idx]),axis=0)
contblock = np.concatenate((contblock,frm.cont.val(self.cent)),axis=0)
datablock = datablock[cuts,:]
contblock = contblock[cuts]
counts, bins = ast.histogram(quant[cuts],bins='blocks')
sorting = np.digitize(quant[cuts],bins[:-1]) # :-1 to get the correct number of buckets from digitize
binned = np.mean(datablock[sorting == 1,:],axis=0)
con = np.zeros(len(counts)); con[0] = np.mean( contblock[sorting == 1] )
for i in np.unique(sorting)[1:]:
binned = np.vstack( (binned,np.mean(datablock[sorting == i,:],axis=0) ))
con[i-1] = np.mean(contblock[sorting == i])
return binned,con,bins,counts
def partition_data(self,data):
return np.digitize(data,self.bins[:-1]) # :-1 to get the correct number of buckets from digitize
def measure(self):
nfram = len(self.data)
guess = self.group.ref[self.idx].argmin()
width = len(self.idx)*0.16 # Min fraction of points to be used in fit
vel = np.zeros(nfram)
bot = np.zeros(nfram)
err = np.zeros(nfram)
ew = np.zeros(nfram)
mn = np.zeros(nfram)
var = np.zeros(nfram)
ske = np.zeros(nfram)
kur = np.zeros(nfram)
if width%2 == 0:
width +=1
dwn = int((width - 1)/2); up = dwn+1
bottom = guess + np.arange(-dwn,up)
test = guess + np.arange(-(dwn+1),(up+1))
vel,bot,err = self.__linfit(bottom,test)
ew = self.__equivalent_width(nfram)
mn,var,ske,kur = self.__moments()
return (vel,bot,err,ew ,mn ,var,ske,kur)
def __linfit(self,bottom,test):
cv = 299792.458
fit = pol.polyfit(self.group.lmbd[self.idx[bottom]],self.data[:,bottom].T,2)
a,b,c = fit[2,:],fit[1,:],fit[0,:]
lmin = -b/(2*a)
bot = pol.polyval(lmin,fit,tensor=False)
pred = pol.polyval(self.group.lmbd[test],fit)
vel = cv*(lmin-self.cent)/self.cent
# Error of fit with extra error term to penalize fits
# that gets wildly off center, with extra weight so it *hurts*
err = np.sqrt( np.mean( (self.data[:,test]-pred)**2,axis=1)
+ 2*(lmin-self.cent)**2 )
return vel,bot,err
def __equivalent_width(self,nrows):
dlam = np.diff(self.group.lmbd[slice(self.idx[0]-1,self.idx[-1]+1)]
).reshape((-1,1))*np.ones(nrows)
return ((self.data-1)*dlam.T).sum(axis=1)*1e3 ## MiliÅngström
def __moments(self):
x = self.group.lmbd[self.idx]
dpdf = (1-self.data/self.data.max(axis=1).reshape(-1,1))
dpdf = dpdf/dpdf.sum(axis=1).reshape(-1,1)
mu = np.sum(dpdf*x,axis=1).reshape(-1,1) # Reshaping enables broadcasting
mu2 = np.sum(dpdf*(x-mu)**2,axis=1)
mu3 = np.sum(dpdf*(x-mu)**3,axis=1)
mu4 = np.sum(dpdf*(x-mu)**4,axis=1)
mu = mu.reshape(-1) # Undoing reshape to allow assignment
skew = mu3/mu2**(3/2)
kurt = (mu4/mu2**2 - 3)
return mu,mu2,skew,kurt