-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmnist1d_utils.py
137 lines (113 loc) · 4.93 KB
/
mnist1d_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import numpy as np
import requests
import scipy
import pickle
from scipy.ndimage.filters import gaussian_filter
from scipy.interpolate import interp1d
def pad(x, padding):
low, high = padding
p = low + int(np.random.rand()*(high-low+1))
return np.concatenate([x, np.zeros((p))])
def shear(x, scale=10):
coeff = scale*(np.random.rand() - 0.5)
return x - coeff*np.linspace(-0.5,.5,len(x))
def translate(x, max_translation):
k = np.random.choice(max_translation)
return np.concatenate([x[-k:], x[:-k]])
def corr_noise_like(x, scale):
noise = scale * np.random.randn(*x.shape)
return gaussian_filter(noise, 2)
def iid_noise_like(x, scale):
noise = scale * np.random.randn(*x.shape)
return noise
def interpolate(x, N):
scale = np.linspace(0,1,len(x))
new_scale = np.linspace(0,1,N)
new_x = interp1d(scale, x, axis=0, kind='linear')(new_scale)
return new_x
def transform(x, y, args, eps=1e-8):
new_x = pad(x+eps, args.padding) # pad
new_x = interpolate(new_x, args.template_len + args.padding[-1]) # dilate
new_y = interpolate(y, args.template_len + args.padding[-1])
new_x *= (1 + args.scale_coeff*(np.random.rand() - 0.5)) # scale
new_x = translate(new_x, args.max_translation) #translate
# add noise
mask = new_x != 0
new_x = mask*new_x + (1-mask)*corr_noise_like(new_x, args.corr_noise_scale)
new_x = new_x + iid_noise_like(new_x, args.iid_noise_scale)
# shear and interpolate
new_x = shear(new_x, args.shear_scale)
new_x = interpolate(new_x, args.final_seq_length) # subsample
new_y = interpolate(new_y, args.final_seq_length)
return new_x, new_y
def get_dataset_args(as_dict=False):
arg_dict = {'num_samples': 5000,
'train_split': 0.8,
'template_len': 12,
'padding': [36,60],
'scale_coeff': .4,
'max_translation': 48,
'corr_noise_scale': 0.25,
'iid_noise_scale': 2e-2,
'shear_scale': 0.75,
'shuffle_seq': False,
'final_seq_length': 40,
'seed': 42,
'url': 'https://github.com/greydanus/mnist1d/raw/master/mnist1d_data.pkl'}
return arg_dict if as_dict else ObjectView(arg_dict)
def to_pickle(thing, path): # save something
with open(path, 'wb') as handle:
pickle.dump(thing, handle, protocol=4)
def from_pickle(path): # load something
thing = None
with open(path, 'rb') as handle:
thing = pickle.load(handle)
return thing
class ObjectView(object):
def __init__(self, d): self.__dict__ = d
# basic 1D templates for the 10 digits
def get_templates():
d0 = np.asarray([5,6,6.5,6.75,7,7,7,7,6.75,6.5,6,5])
d1 = np.asarray([5,3,3,3.4,3.8,4.2,4.6,5,5.4,5.8,5,5])
d2 = np.asarray([5,6,6.5,6.5,6,5.25,4.75,4,3.5,3.5,4,5])
d3 = np.asarray([5,6,6.5,6.5,6,5,5,6,6.5,6.5,6,5])
d4 = np.asarray([5,4.4,3.8,3.2,2.6,2.6,5,5,5,5,5,5])
d5 = np.asarray([5,3,3,3,3,5,6,6.5,6.5,6,4.5,5])
d6 = np.asarray([5,4,3.5,3.25,3,3,3,3,3.25,3.5,4,5])
d7 = np.asarray([5,7,7,6.6,6.2,5.8,5.4,5,4.6,4.2,5,5])
d8 = np.asarray([5,4,3.5,3.5,4,5,5,4,3.5,3.5,4,5])
d9 = np.asarray([5,4,3.5,3.5,4,5,5,5,5,4.7,4.3,5])
x = np.stack([d0,d1,d2,d3,d4,d5,d6,d7,d8,d9])
x -= x.mean(1,keepdims=True) # whiten
x /= x.std(1,keepdims=True)
x -= x[:,:1] # signal starts and ends at 0
templates = {'x': x/6., 't': np.linspace(-5, 5, len(d0))/6.,
'y': np.asarray([0,1,2,3,4,5,6,7,8,9])}
return templates
def make_dataset(args=None, template=None, ):
templates = get_templates() if template is None else template
args = get_dataset_args() if args is None else args
np.random.seed(args.seed) # reproducibility
xs, ys = [], []
samples_per_class = args.num_samples // len(templates['y'])
for label_ix in range(len(templates['y'])):
for example_ix in range(samples_per_class):
x = templates['x'][label_ix]
t = templates['t']
y = templates['y'][label_ix]
x, new_t = transform(x, t, args) # new_t transformation is same each time
xs.append(x) ; ys.append(y)
batch_shuffle = np.random.permutation(len(ys)) # shuffle batch dimension
xs = np.stack(xs)[batch_shuffle]
ys = np.stack(ys)[batch_shuffle]
if args.shuffle_seq: # maybe shuffle the spatial dimension
seq_shuffle = np.random.permutation(args.final_seq_length)
xs = xs[...,seq_shuffle]
new_t = new_t/xs.std()
xs = (xs-xs.mean())/xs.std() # center the dataset & set standard deviation to 1
# train / test split
split_ix = int(len(ys)*args.train_split)
dataset = {'x': xs[:split_ix], 'x_test': xs[split_ix:],
'y': ys[:split_ix], 'y_test': ys[split_ix:],
't':new_t, 'templates': templates}
return dataset