-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpreprocess.py
62 lines (48 loc) · 1.94 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
import numpy.matlib
import csv
from tqdm import *
from numpy import vstack,array
from numpy.random import rand
class Preprocessor(object):
def pca(self, X, k):
'''
Calculate covariance matrix
'''
cov_mat = np.cov(X.T)
'''
Calculate eigen vector
'''
eig_val_cov, eig_vec_cov = np.linalg.eig(cov_mat)
'''
Sort by eigen value
'''
eig_pairs = [(np.abs(eig_val_cov[i]), eig_vec_cov[:,i]) for i in range(len(eig_val_cov))]
eig_pairs.sort(key=lambda x: x[0], reverse=True)
eig_pairs = np.array([eig_pairs[i][1] for i in xrange(k)])
return X.dot(eig_pairs.T), eig_pairs
def lda(self, X, T, d):
N, M = X.shape
K = T.shape[1]
means = np.array([np.mean(X[T.argmax(axis=1) == k], axis=0) for k in xrange(K)])
S_W = np.zeros([M, M])
for k, mean in zip(range(K), means):
class_sc_mat = np.zeros([M, M])
x_k = X[T.argmax(axis=1) == k]
S_W += ((x_k - mean).T.dot(x_k - mean))
mean_all = np.mean(X, axis=0)
S_B = np.zeros([M, M])
for k, mean in zip(range(K), means):
n = len(X[T.argmax(axis=1) == k,:])
S_B += n * (mean - mean_all).dot((mean - mean_all))
eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_W).dot(S_B))
for i in range(len(eig_vals)):
eigvec_sc = eig_vecs[:,i].reshape(M, 1)
eig_pairs = [(np.abs(eig_vals[i]), eig_vecs[:,i]) for i in range(len(eig_vals))]
eig_pairs = sorted(eig_pairs, key=lambda k: k[0], reverse=True)
eig_pairs = np.array(([eig_pairs[i][1] for i in xrange(d)])).astype(np.float32)
return X.dot(eig_pairs.T), eig_pairs
def normalize(self, X):
obs = X
std_dev = np.std(obs, axis=0)
return obs / std_dev, std_dev