-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathmodel_tester.py
143 lines (119 loc) · 8.24 KB
/
model_tester.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# deprecated
# WARN: is this test still needed
import os
from prom_test import get_query_results
from sklearn.metrics import mean_absolute_error, mean_squared_error
from kepler_model.estimate.model.model import load_model
from kepler_model.train import DefaultExtractor
from kepler_model.train.isolator.train_isolator import get_background_containers, isolate_container
from kepler_model.train.profile import get_min_max_watt, profile_process
from kepler_model.util.loader import default_node_type
from kepler_model.util.prom_types import TIMESTAMP_COL, prom_responses_to_results
from kepler_model.util.train_types import ModelOutputType, PowerSourceMap
# model_tester.py
# to get the test result across different train/test data set
extractor = DefaultExtractor()
def list_subfolder(top_path):
return [f for f in os.listdir(top_path) if not os.path.isdir(os.path.join(top_path, f))]
# return mae, mse
def compute_error(predicted_power, actual_powers):
mse = mean_squared_error(actual_powers, predicted_power)
mae = mean_absolute_error(actual_powers, predicted_power)
return mae, mse
# return model, metadata
def process(train_dataset_name, test_dataset_name, target_path):
idle_data = get_query_results(save_name="idle")
background_containers = get_background_containers(idle_data)
profiles = profile_process(idle_data)
test_data = prom_responses_to_results()
node_types, _ = extractor.get_node_types(idle_data)
if node_types is None:
node_type = default_node_type # default node type
else:
node_type = int(node_types[0]) # limit only one node type in single data set
# find best_ab
best_abs_model = find_best_abs_model()
for isolator, _ in isolators.items():
print("Isolator: ", isolator)
pipeline_name = get_pipeline_name(train_dataset_name, isolator)
save_path = os.path.join(target_path, pipeline_name)
for energy_source in list_subfolder(save_path):
source_path = os.path.join(save_path, energy_source)
# perform AbsPower first to get the best abs model
for output_type in [ModelOutputType.AbsPower, ModelOutputType.DynPower]:
model_output_path = os.path.join(source_path, output_type.name)
for feature_group in list_subfolder(model_output_path):
feature_path = os.path.join(source_path, feature_group)
model_paths = [os.path.join(feature_path, f) for f in list_subfolder(feature_path)]
for model_path in model_paths:
model = load_model(model_path)
energy_components = PowerSourceMap[energy_source]
extracted_data, power_columns, _, _ = extractor.extract(test_data, energy_components, feature_group, energy_source, node_level=False)
feature_columns = [col for col in extracted_data.columns if col not in power_columns]
if not model.feature_check(feature_columns):
print(f"model {model.name} ({energy_source}/{output_type.name}/{feature_group})is not valid to test")
continue
if output_type == ModelOutputType.AbsPower:
data_with_prediction = extracted_data.copy()
predicted_data = model.get_power(extracted_data)
else:
target_data, background_data = isolate_container(extracted_data, background_containers)
data_with_prediction = target_data.copy()
predicted_data = model.get_power(target_data)
abs_model = best_abs_model[energy_source]
predicted_background_power = abs_model.get_power(background_data)
predicted_background_dynamic_power = model.get_power(background_data)
# for each energy_component
for energy_component, values in predicted_data.items():
item = {
"train_dataset": train_dataset_name,
"test_dataset": test_dataset_name,
"isolator": isolator,
"energy_source": energy_source,
"feature_group": feature_group,
"model": model.name,
"model_path": model_path,
"energy_component": energy_component,
}
label_power_columns = [col for col in power_columns if energy_component in col]
# sum label value for all unit
# mean to squeeze value of power back
sum_power_label = predicted_data.groupby([TIMESTAMP_COL])[label_power_columns].mean().sum(axis=1).sort_index()
# append predicted value to data_with_prediction
# TO-DO: use predict_and_sort in train_isolator.py
predicted_power_colname = get_predicted_power_colname(energy_component)
data_with_prediction[predicted_power_colname] = values
sum_predicted_power = data_with_prediction.groupby([TIMESTAMP_COL]).sum().sort_index()[predicted_power_colname]
if output_type == ModelOutputType.AbsPower:
item["mae"], item["mse"] = compute_error(sum_power_label, sum_predicted_power)
else:
# profile-based
min_watt, max_watt = get_min_max_watt(profiles, energy_component, node_type)
profile_watt = (min_watt + max_watt) / 2
profile_reconstructed_power = sum_predicted_power + profile_watt
item["profile_mae"], item["profile_mse"] = compute_error(sum_power_label, profile_reconstructed_power)
item["profile_watt"] = profile_watt
# calculate background power cols (used by both abs-predicted and min)
predicted_background_power_values = predicted_background_power[energy_component]
background_power_colname = get_predicted_background_power_colname(energy_component)
background_data[background_power_colname] = predicted_background_power_values
predicted_background_dynamic_power_values = predicted_background_dynamic_power[energy_component]
dynamic_background_power_colname = get_predicted_dynamic_background_power_colname(energy_component)
background_data[dynamic_background_power_colname] = predicted_background_dynamic_power_values
sorted_background_data = background_data.groupby([TIMESTAMP_COL]).sum().sort_index()
# abs-predicted based
sum_background_power = sorted_background_data[background_power_colname]
trained_reconstructed_power = sum_background_power + sum_predicted_power
item["train_bg_mae"], item["train_bg_mse"] = compute_error(sum_power_label, trained_reconstructed_power)
item["avg_train_bg"] = sum_background_power.mean()
item["bg_abs_model"] = abs_model.name
# min based
sum_dynamic_background_power = sorted_background_data[dynamic_background_power_colname]
min_reconstructed_power = sum_dynamic_background_power + min_watt + sum_predicted_power
item["min_bg_mae"], item["min_bg_mse"] = compute_error(sum_power_label, min_reconstructed_power)
item["min"] = min_watt
if __name__ == "__main__":
dataset_name = "sample_data"
target_path = offline_trainer_output_path
# same train/test dataset
process(dataset_name, dataset_name, target_path)