-
Notifications
You must be signed in to change notification settings - Fork 229
/
Copy pathtest_similarity_learn.py
188 lines (149 loc) · 5.99 KB
/
test_similarity_learn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
from metric_learn.oasis import OASIS, OASIS_Supervised
import numpy as np
from sklearn.utils import check_random_state
import pytest
from numpy.testing import assert_array_equal
from sklearn.datasets import load_iris
from sklearn.metrics import pairwise_distances
from test.test_utils import build_triplets
SEED = 33
RNG = check_random_state(SEED)
def test_sanity_check():
"""
With M=I init. As the algorithm sees more triplets,
the score(triplet) should increse or maintain.
A warning might show up regarding division by 0. See
test_divide_zero for further research.
"""
triplets = np.array([[[0, 1], [2, 1], [0, 0]],
[[2, 1], [0, 1], [2, 0]],
[[0, 0], [2, 0], [0, 1]],
[[2, 0], [0, 0], [2, 1]],
[[2, 1], [-1, -1], [33, 21]]])
# Baseline, no M = Identity
oasis = OASIS(n_iter=1, c=0.24, random_state=RNG, init='identity')
# See 1/5 triplets
oasis.fit(triplets[:1])
a1 = oasis.score(triplets)
msg = "divide by zero encountered in double_scalars"
with pytest.warns(RuntimeWarning) as raised_warning:
# See 2/5 triplets
oasis.partial_fit(triplets[1:2], n_iter=2)
a2 = oasis.score(triplets)
# See 4/5 triplets
oasis.partial_fit(triplets[2:4], n_iter=3)
a3 = oasis.score(triplets)
# See 5/5 triplets, one is seen again
oasis.partial_fit(triplets[4:5], n_iter=1)
a4 = oasis.score(triplets)
assert a2 >= a1
assert a3 >= a2
assert a4 >= a3
assert msg == raised_warning[0].message.args[0]
def test_score_zero():
"""
The third triplet will give similarity 0, then the prediction
will be 0. But predict() must give results in {+1, -1}. This
tests forcing prediction 0 to be -1.
"""
triplets = np.array([[[0, 1], [2, 1], [0, 0]],
[[2, 1], [0, 1], [2, 0]],
[[0, 0], [2, 0], [0, 1]],
[[2, 0], [0, 0], [2, 1]]])
# Baseline, no M = Identity
with pytest.raises(ValueError):
oasis1 = OASIS(n_iter=0, c=0.24, random_state=RNG)
oasis1.fit(triplets)
predictions = oasis1.predict(triplets)
not_valid = [e for e in predictions if e not in [-1, 1]]
assert len(not_valid) == 0
def test_divide_zero():
"""
The thrid triplet willl force norm(V_i) to be zero, and
force a division by 0 when calculating tau = loss / norm(V_i).
No error should be experienced. A warning should show up.
"""
triplets = np.array([[[0, 1], [2, 1], [0, 0]],
[[2, 1], [0, 1], [2, 0]],
[[0, 0], [2, 0], [0, 1]],
[[2, 0], [0, 0], [2, 1]]])
# Baseline, no M = Identity
oasis1 = OASIS(n_iter=20, c=0.24, random_state=RNG)
msg = "divide by zero encountered in double_scalars"
with pytest.warns(RuntimeWarning) as raised_warning:
oasis1.fit(triplets)
assert msg == raised_warning[0].message.args[0]
def class_separation(X, labels, callable_metric):
unique_labels, label_inds = np.unique(labels, return_inverse=True)
ratio = 0
for li in range(len(unique_labels)):
Xc = X[label_inds == li]
Xnc = X[label_inds != li]
aux = pairwise_distances(Xc, metric=callable_metric).mean()
ratio += aux / pairwise_distances(Xc, Xnc, metric=callable_metric).mean()
return ratio / len(unique_labels)
def test_iris_supervised():
"""
Test a real use case: Using class separation as evaluation metric,
and the Iris dataset, this tests verifies that points of the same
class are closer now, using the learnt bilinear similarity at OASIS.
In contrast with Mahalanobis tests, we cant use transform(X) and
then use euclidean metric. Instead, we need to pass pairwise_distances
method from sklearn an explicit callable metric. Then we use
get_metric() for that purpose.
"""
# Default bilinear similarity uses M = Identity
def bilinear_identity(u, v):
return - np.dot(np.dot(u.T, np.identity(np.shape(u)[0])), v)
X, y = load_iris(return_X_y=True)
prev = class_separation(X, y, bilinear_identity)
oasis = OASIS_Supervised(random_state=33, c=0.38)
oasis.fit(X, y)
now = class_separation(X, y, oasis.get_metric())
assert now < prev # -0.0407866 vs 1.08 !
@pytest.mark.parametrize('init', ['random', 'random_spd',
'covariance', 'identity'])
@pytest.mark.parametrize('random_state', [33, 69, 112])
def test_random_state_in_suffling(init, random_state):
"""
Tests that many instances of OASIS, with the same random_state,
produce the same shuffling on the triplets given.
Test that many instances of OASIS, with different random_state,
produce different shuffling on the trilpets given.
The triplets are produced with the Iris dataset.
Tested with all possible init.
"""
triplets, _, _, _ = build_triplets()
# Test same random_state, then same shuffling
oasis_a = OASIS(random_state=random_state, init=init)
oasis_a.fit(triplets)
shuffle_a = oasis_a.indices
oasis_b = OASIS(random_state=random_state, init=init)
oasis_b.fit(triplets)
shuffle_b = oasis_b.indices
assert_array_equal(shuffle_a, shuffle_b)
# Test different random states
last_suffle = shuffle_b
for i in range(3, 5):
oasis_a = OASIS(random_state=random_state+i, init=init)
oasis_a.fit(triplets)
shuffle_a = oasis_a.indices
with pytest.raises(AssertionError):
assert_array_equal(last_suffle, shuffle_a)
last_suffle = shuffle_a
@pytest.mark.parametrize('init', ['random', 'random_spd',
'covariance', 'identity'])
@pytest.mark.parametrize('random_state', [33, 69, 112])
def test_general_results_random_state(init, random_state):
"""
With fixed triplets and random_state, two instances of OASIS
should produce the same output (matrix W)
"""
triplets, _, _, _ = build_triplets()
oasis_a = OASIS(random_state=random_state, init=init)
oasis_a.fit(triplets)
matrix_a = oasis_a.get_bilinear_matrix()
oasis_b = OASIS(random_state=random_state, init=init)
oasis_b.fit(triplets)
matrix_b = oasis_b.get_bilinear_matrix()
assert_array_equal(matrix_a, matrix_b)