-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmake_plot_uncertainties.py
155 lines (150 loc) · 4.48 KB
/
make_plot_uncertainties.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from gpflow.kernels import Matern52, SquaredExponential
from algorithms.gp_on_real_space import GPonRealSpace
from algorithms.KNN import KNN
from algorithms.uncertain_rf import UncertainRandomForest
from data.train_test_split import BioSplitter, PositionSplitter, RandomSplitter
from util.mlflow.constants import (
ESM,
ESM1V,
ESM2,
EVE,
LINEAR,
MSE,
NO_AUGMENT,
ONE_HOT,
PROTT5,
TRANSFORMER,
)
from visualization.plot_metric_for_uncertainties import plot_uncertainty_eval
datasets = ["1FQG"] # 1FQG, UBQT, CALM
metric = MSE
reps = [PROTT5, ESM, EVE, ONE_HOT]
augmentations = [NO_AUGMENT]
number_quantiles = 10
algos = [
UncertainRandomForest().get_name(),
GPonRealSpace().get_name(),
KNN().get_name(),
GPonRealSpace(kernel_factory=lambda: Matern52()).get_name(),
GPonRealSpace(kernel_factory=lambda: SquaredExponential()).get_name(),
]
d = None # 2, 10, 100, 1000, None
dim_reduction = LINEAR
cached_results = True
if __name__ == "__main__":
### MAKE UNCERTAIN PLOTS RANDOMSPLITTER
plot_uncertainty_eval(
datasets=datasets,
reps=reps,
algos=algos,
train_test_splitter=RandomSplitter(datasets[0]),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)
### MAKE UNCERTAIN PLOTS POSITIONSPLITTER
plot_uncertainty_eval(
datasets=datasets,
reps=reps,
algos=algos,
train_test_splitter=PositionSplitter(datasets[0]),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)
#########
#### SUPPLEMENTARY:
for dataset in ["1FQG", "UBQT", "TIMB", "MTH3", "BRCA"]:
plot_uncertainty_eval(
datasets=[dataset],
reps=[TRANSFORMER, PROTT5, ESM, ESM1V, ESM2, EVE, ONE_HOT],
algos=algos,
train_test_splitter=RandomSplitter(datasets[0]),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)
# MAKE UNCERTAIN PLOTS POSITIONSPLITTER
for dataset in ["1FQG", "UBQT", "TIMB", "MTH3", "BRCA"]:
plot_uncertainty_eval(
datasets=[dataset],
reps=[TRANSFORMER, PROTT5, ESM, ESM1V, ESM2, EVE, ONE_HOT],
algos=algos,
train_test_splitter=PositionSplitter(datasets[0]),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)
### SI: UNCERTAINTIES TOXI
plot_uncertainty_eval(
datasets=["TOXI"],
reps=reps,
algos=algos,
train_test_splitter=BioSplitter("TOXI", 1, 2),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)
plot_uncertainty_eval(
datasets=["TOXI"],
reps=reps,
algos=algos,
train_test_splitter=BioSplitter("TOXI", 2, 2),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)
plot_uncertainty_eval(
datasets=["TOXI"],
reps=reps,
algos=algos,
train_test_splitter=BioSplitter("TOXI", 2, 3),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)
plot_uncertainty_eval(
datasets=["TOXI"],
reps=reps,
algos=algos,
train_test_splitter=BioSplitter("TOXI", 3, 3),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)
plot_uncertainty_eval(
datasets=["TOXI"],
reps=reps,
algos=algos,
train_test_splitter=BioSplitter("TOXI", 3, 4),
augmentations=augmentations,
number_quantiles=number_quantiles,
optimize=True,
d=d,
dim_reduction=None,
cached_results=cached_results,
)