-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenerators2d.py
224 lines (185 loc) · 7.94 KB
/
generators2d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import random
import numpy as np
import sklearn
def generate_uniform_around_centers(centers, variance):
"""
The generate_uniform_around_centers function takes in a list of centers and a variance.
It then randomly selects one of the centers from the list, adds to it a random number between
-variance and +variance, and returns that new point.
:param centers: Define the centers of the clusters
:param variance: Control the spread of the points around each center
:return: An array of two numbers
"""
num_center = len(centers)
return centers[np.random.choice(num_center)] + variance * np.random.uniform(-1, 1, 2)
def generate_cross(centers, variance):
"""
The generate_cross function takes in two parameters, centers and variance.
The centers parameter is a list of lists containing the x and y coordinates for each center point.
The variance parameter is a float that determines how far away from the center points we want our cross to be.
This function returns an array with 2 elements, which are the x and y coordinates of one point on our cross.
:param centers: Generate the centers of the cross
:param variance: Control the range of the random number
:return: A point that is a linear combination of two points in the centers array
"""
num_center = len(centers)
x = variance * np.random.uniform(-1, 1)
y = (np.random.randint(2) * 2 - 1) * x
return centers[np.random.choice(num_center)] + [x, y]
def sample_data(dataset, batch_size, scale, var):
"""
The sample_data function is used to generate data for the various datasets that we will be using.
The function takes in three arguments: dataset, batch_size, and scale. The dataset argument specifies which
dataset to use (e.g., "25gaussians", "swissroll", etc.). The batch_size argument specifies
how many samples to return at a time from the generator (i.e., how many points are in each minibatch). Finally,
the scale argument is used as a scaling factor for some of our datasets.
:param dataset: Select the dataset to be used
:param batch_size: Determine the number of samples to draw from the dataset
:param scale: Scale the centers of the gaussians
:param var: Control the variance of the gaussian distribution
:return: A generator that can be used to generate batches of data
"""
if dataset == "25gaussians":
dataset = []
for i in range(100000 // 25):
for x in range(-2, 3):
for y in range(-2, 3):
point = np.random.randn(2) * 0.05
point[0] += 2 * x
point[1] += 2 * y
dataset.append(point)
dataset = np.array(dataset, dtype="float32")
np.random.shuffle(dataset)
# dataset /= 2.828 # stdev
while True:
for i in range(len(dataset) / batch_size):
yield dataset[i * batch_size: (i + 1) * batch_size]
elif dataset == "swissroll":
while True:
data = sklearn.datasets.make_swiss_roll(n_samples=batch_size, noise=0.25)[0]
data = data.astype("float32")[:, [0, 2]]
# data /= 7.5 # stdev plus a little
yield data
elif dataset == "8gaussians":
scale = scale
variance = var
centers = [
(1, 0),
(-1, 0),
(0, 1),
(0, -1),
(1.0 / np.sqrt(2), 1.0 / np.sqrt(2)),
(1.0 / np.sqrt(2), -1.0 / np.sqrt(2)),
(-1.0 / np.sqrt(2), 1.0 / np.sqrt(2)),
(-1.0 / np.sqrt(2), -1.0 / np.sqrt(2)),
]
centers = [(scale * x, scale * y) for x, y in centers]
while True:
dataset = []
for i in range(batch_size):
point = np.random.randn(2) * variance
center = random.choice(centers)
point[0] += center[0]
point[1] += center[1]
dataset.append(point)
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset
elif dataset == "checker_board_five":
scale = scale
variance = var
centers = scale * np.array([[0, 0], [1, 1], [-1, 1], [-1, -1], [1, -1]])
while True:
dataset = []
for i in range(batch_size):
dataset.append(generate_uniform_around_centers(centers, variance))
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset
elif dataset == "checker_board_four":
scale = scale
variance = var
centers = scale * np.array([[1, 0], [0, 1], [-1, 0], [0, -1]])
while True:
dataset = []
for i in range(batch_size):
dataset.append(generate_uniform_around_centers(centers, variance))
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset
elif dataset == "simpleGaussian":
while True:
dataset = []
for i in range(batch_size):
point = np.random.randn(2)
dataset.append(point)
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset
elif dataset == "unif_square":
while True:
dataset = []
for i in range(batch_size):
point = np.random.uniform(-var, var, 2)
dataset.append(point)
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset
elif dataset == "simpletranslatedGaussian":
while True:
dataset = []
for i in range(batch_size):
point = scale * np.array([1.0, 1.0]) + np.random.randn(2)
dataset.append(point)
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset
elif dataset == "simpletranslated_scaled_Gaussian":
while True:
dataset = []
for i in range(batch_size):
point = scale * np.array([1.0, 1.0]) + var * np.random.randn(2)
dataset.append(point)
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset
elif dataset == "circle-S1":
while True:
dataset = []
for i in range(batch_size):
angle = np.random.rand() * 2 * np.pi
point = scale * np.array([np.cos(angle), np.sin(angle)])
dataset.append(point)
dataset = np.array(dataset, dtype="float32")
yield dataset
elif dataset == "semi-circle-S1":
while True:
dataset = []
for i in range(batch_size):
angle = np.random.rand() * np.pi
point = scale * np.array([np.cos(angle), np.sin(angle)])
dataset.append(point)
dataset = np.array(dataset, dtype="float32")
yield dataset
elif dataset == "checker_board_five_cross":
scale = scale
variance = var
centers = scale * np.array([[0, 0], [1, 1], [-1, 1], [-1, -1], [1, -1]])
while True:
dataset = []
for i in range(batch_size):
dataset.append(generate_cross(centers, variance))
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset
elif dataset == "checker_board_five_expanded":
scale = scale
variance = 2 * var
centers = scale * np.array([[0, 0], [1, 1], [-1, 1], [-1, -1], [1, -1]])
while True:
dataset = []
for i in range(batch_size):
dataset.append(generate_uniform_around_centers(centers, variance))
dataset = np.array(dataset, dtype="float32")
# dataset /= 1.414 # stdev
yield dataset