Skip to content

Commit 4ce6759

Browse files
committed
fix mfom tests: simulate multi-label dataset
1 parent 8272bcf commit 4ce6759

File tree

5 files changed

+71
-170
lines changed

5 files changed

+71
-170
lines changed

experiments/runner.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
# OGA-TS model runner
33

4-
export PYTHONPATH="/home/vano/wrkdir/projects/Python/attribute_detection/:$PYTHONPATH"
4+
export PYTHONPATH="`pwd`/:$PYTHONPATH"
55
source activate ai # activate conda environment
66

77
model=sed_ogits

src/model/objectives.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,12 @@ def mfom_eer_normalized(y_true, y_pred):
3333
# smooth false negative and false positive rates
3434
fnr = K.log(K.sum(fn, axis=0) + 1.) - K.log(P + 1.)
3535
fpr = K.log(K.sum(fp, axis=0) + 1.) - K.log(N + 1.)
36-
fnr = K.exp(fnr)
37-
fpr = K.exp(fpr)
38-
smooth_eer = fpr + .5 * K.abs(fnr - fpr)
3936
# debug output
4037
# fnr = K.print_tensor(K.exp(fnr), message="FNR is: ")
4138
# fpr = K.print_tensor(K.exp(fpr), message="FPR is: ")
39+
fnr = K.exp(fnr)
40+
fpr = K.exp(fpr)
41+
smooth_eer = fpr + .5 * K.abs(fnr - fpr)
4242
return K.mean(smooth_eer)
4343

4444

tests/model/test_mfom_2d.py

Lines changed: 64 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,83 +1,100 @@
1+
"""
2+
Simulate multi-label classification.
3+
"""
14
import numpy as np
25
import keras.backend as K
36
from keras.models import Model
47
from keras.layers import Dense, Activation, Input
8+
import keras.regularizers as regs
9+
import keras.constraints as constraints
510
import matplotlib.pyplot as plt
11+
from sklearn.datasets import make_multilabel_classification
12+
from sklearn.model_selection import train_test_split
13+
from sklearn import preprocessing
614
import src.model.mfom as mfom
715
import src.utils.metrics as MT
816
import src.model.objectives as obj
917

10-
np.random.seed(777)
18+
RANDOM_SEED = 777
19+
np.random.seed(RANDOM_SEED)
1120

1221

13-
def generate_dataset(output_dim=14, num_examples=10000):
14-
"""
15-
Summation of two binary numbers.
16-
Input is two binary numbers, stacked in one vector.
17-
Output is an integer number.
18-
"""
22+
def generate_dataset(n_smp=300, ratio=0.3, n_feat=2, n_cls=2):
23+
x, y = make_multilabel_classification(n_samples=n_smp, n_features=n_feat,
24+
n_classes=n_cls, n_labels=1,
25+
allow_unlabeled=False,
26+
random_state=RANDOM_SEED)
27+
scaler = preprocessing.StandardScaler()
28+
x = scaler.fit_transform(x)
29+
x_tr, x_tst, y_tr, y_tst = train_test_split(x, y, test_size=ratio, random_state=RANDOM_SEED)
30+
return x_tr, x_tst, y_tr, y_tst
1931

20-
def int2vec(x, dim=output_dim):
21-
out = np.zeros(dim)
22-
binrep = np.array(list(np.binary_repr(x))).astype('int')
23-
out[-len(binrep):] = binrep
24-
return out
2532

26-
x_left_int = (np.random.rand(num_examples) * 2 ** (output_dim - 1)).astype('int')
27-
x_right_int = (np.random.rand(num_examples) * 2 ** (output_dim - 1)).astype('int')
28-
y_int = x_left_int + x_right_int
29-
30-
x = list()
31-
for i in range(len(x_left_int)):
32-
x.append(np.concatenate((int2vec(x_left_int[i]), int2vec(x_right_int[i]))))
33-
34-
y = list()
35-
for i in range(len(y_int)):
36-
y.append(int2vec(y_int[i]))
37-
38-
x = np.array(x)
39-
y = np.array(y)
40-
return x, y
41-
42-
43-
if __name__ == '__main__':
44-
dim = 14
45-
nclass = 7
46-
47-
# Input block
48-
feat_input = Input(shape=(dim,), name='main_input')
33+
def mfom_model(in_dim, nclass):
34+
# input block
35+
feat_input = Input(shape=(in_dim,), name='main_input')
4936
# layer 1
50-
x = Dense(30, name='dense1')(feat_input)
37+
x = Dense(10, name='dense1')(feat_input)
5138
x = Activation(activation='sigmoid', name='act1')(x)
39+
# layer 2
40+
x = Dense(10, name='dense2')(x)
41+
x = Activation(activation='sigmoid', name='act2')(x)
5242
# output layer
5343
x = Dense(nclass, name='pre_activation')(x)
5444
y_pred = Activation(activation='sigmoid', name='output')(x)
5545

46+
# === MFoM head ===
5647
# misclassification layer, feed Y
5748
y_true = Input(shape=(nclass,), name='y_true')
5849
psi = mfom.UvZMisclassification(name='uvz_misclass')([y_true, y_pred])
5950

6051
# class Loss function layer
61-
out = mfom.SmoothErrorCounter(name='smooth_error_counter')(psi)
52+
# NOTE: you may want to add regularization or constraints
53+
out = mfom.SmoothErrorCounter(name='smooth_error_counter',
54+
# alpha_constraint=constraints.min_max_norm(min_value=-4., max_value=4.),
55+
# alpha_regularizer=regs.l1(0.001),
56+
# beta_constraint=constraints.min_max_norm(min_value=-4., max_value=4.),
57+
# beta_regularizer=regs.l1(0.001)
58+
)(psi)
6259

6360
# compile model
6461
model = Model(input=[y_true, feat_input], output=out)
65-
model.compile(loss=obj.mfom_eer_normalized, optimizer='Adam')
66-
model.summary()
62+
return model
6763

68-
# train
69-
X, Y = generate_dataset(output_dim=nclass)
70-
hist = model.fit([Y, X], Y, nb_epoch=100, batch_size=16)
7164

72-
# calc accuracy: we cut MFoM head, up to sigmoid output
65+
def cut_mfom(model):
66+
# calc accuracy: cut MFoM head, up to sigmoid output
7367
input = model.get_layer(name='main_input').output
7468
out = model.get_layer(name='output').output
75-
cut_model = Model(input=input, output=out)
76-
y_pred = cut_model.predict(X)
77-
eer_val = MT.eer(y_true=Y.flatten(), y_pred=y_pred.flatten())
69+
cut_net = Model(input=input, output=out)
70+
return cut_net
71+
72+
73+
if __name__ == '__main__':
74+
dim = 20
75+
nclass = 10
76+
77+
# mfom model
78+
model = mfom_model(dim, nclass)
79+
model.compile(loss=obj.mfom_eer_normalized, optimizer='Adam')
80+
model.summary()
81+
82+
# training on multi-label dataset
83+
x_train, x_test, y_train, y_test = generate_dataset(n_smp=10000, n_feat=dim, n_cls=nclass)
84+
mask = y_train.sum(axis=-1) != nclass
85+
y_train = y_train[mask]
86+
x_train = x_train[mask]
87+
hist = model.fit([y_train, x_train], y_train, nb_epoch=10, batch_size=16)
88+
89+
# cut MFoM head
90+
cut_model = cut_mfom(model)
91+
y_pred = cut_model.predict(x_test)
92+
93+
# evaluate
94+
eer_val = MT.eer(y_true=y_test.flatten(), y_pred=y_pred.flatten())
7895
print('EER: %.4f' % eer_val)
7996

80-
# history plot, alpha and beta params
97+
# history plot, alpha and beta params of MFoM
8198
m = model.get_layer('smooth_error_counter')
8299
print('alpha: ', K.get_value(m.alpha))
83100
print('beta: ', K.get_value(m.beta))

tests/model/test_mfom_3d.py

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def int2vec(x, dim=output_dim):
4747
x = Permute((2, 1))(feat_input)
4848
for _f in [256, 64]:
4949
x = TimeDistributed(Dense(_f))(x)
50-
# x = Activation(activation='elu')(x)
50+
x = Activation(activation='elu')(x)
5151
x = Dropout(0.5)(x)
5252
x = TimeDistributed(Dense(nclass))(x)
5353
y_pred = Activation(activation='tanh', name='output')(x)
@@ -62,7 +62,7 @@ def int2vec(x, dim=output_dim):
6262

6363
# compile model
6464
model = Model(input=[y_true, feat_input], output=out)
65-
model.compile(loss=obj.mfom_eer_normalized, optimizer='Adadelta') # Adam, Adadelta
65+
model.compile(loss=obj.mfom_eer_normalized, optimizer='Adadelta')
6666
model.summary()
6767

6868
# train
@@ -87,22 +87,11 @@ def int2vec(x, dim=output_dim):
8787
print('l_EER: %.4f' % eer_val)
8888
print(model.evaluate([all_Y, all_X], all_Y))
8989

90-
# TODO notice from the experiments:
91-
# when we minimize obj.mfom_microf1 with psi = y_pred or psi = -y_pred + 0.5 in
92-
# UvZMisclassification() layer, the smoothF1 is minimized !!! but EER is not at all.
93-
# When we minimize obj.mfom_microf1 with psi = -y_pred + y_neg * unit_avg + y_true * zeros_avg,
94-
# then both smoothF1 and EER are minimized :)
95-
9690
# history plot, alpha and beta params
9791
m = model.get_layer('smooth_error_counter')
9892
print('alpha: ', K.get_value(m.alpha))
9993
print('beta: ', K.get_value(m.beta))
10094

10195
# print stats of psi misclassification measure
102-
# m = model.get_layer('uvz_misclass')
103-
# print('stats_d: ', K.get_value(m.stats_psi))
10496
plt.plot(hist.history['loss'])
105-
plt.show()
106-
107-
# TODO test MFoM with honest uvz-misclassification,
108-
# TODO test for 2 classes and check Triplet loss symptoms
97+
plt.show()

tests/model/test_sed_mfom.py

Lines changed: 0 additions & 105 deletions
This file was deleted.

0 commit comments

Comments
 (0)