Skip to content

Commit 2601771

Browse files
committed
add plotting utils
1 parent 8530d7f commit 2601771

1 file changed

Lines changed: 76 additions & 0 deletions

File tree

plotting/plotting_utils.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import pickle
2+
import numpy as np
3+
4+
def values_to_cdf(values):
5+
cdf_list = []
6+
values.sort()
7+
count = 0
8+
for v in values:
9+
count += 1
10+
cdf_list.append(count / len(values))
11+
return cdf_list
12+
13+
14+
def parse_result_file(
15+
model: str,
16+
dataset: str,
17+
slo_multiplier: int,
18+
arrival: str,
19+
BATCH_DECISION_PATH: str,
20+
APPARATE_LATENCY_PATH: str,
21+
OPTIMAL_LATENCY_PATH: str,
22+
):
23+
print(f"model {model}, dataset {dataset}")
24+
25+
if "slo_multiplier" in BATCH_DECISION_PATH: # CV workload, with SLO multiplier
26+
batch_decision_path = BATCH_DECISION_PATH.format(model=model,
27+
slo_multiplier=slo_multiplier, arrival=arrival)
28+
apparate_latency_path = APPARATE_LATENCY_PATH.format(
29+
model=model, dataset=dataset, slo_multiplier=slo_multiplier, arrival=arrival
30+
)
31+
optimal_latency_path = OPTIMAL_LATENCY_PATH.format(
32+
model=model, dataset=dataset, slo_multiplier=4, arrival=arrival
33+
)
34+
else: # NLP workload, azure arrival traice
35+
batch_decision_path = BATCH_DECISION_PATH.format(model=model, arrival=arrival)
36+
apparate_latency_path = APPARATE_LATENCY_PATH.format(
37+
model=model, dataset=dataset,
38+
arrival=arrival,
39+
)
40+
optimal_latency_path = OPTIMAL_LATENCY_PATH.format(
41+
model=model, dataset=dataset,
42+
arrival=arrival,
43+
)
44+
45+
with open(batch_decision_path, "rb") as f1, open(apparate_latency_path, "rb") as f2, open(optimal_latency_path, "rb") as f3:
46+
batch_decision, apparate_latency, optimal_latency = pickle.load(f1), pickle.load(f2), pickle.load(f3)
47+
48+
per_request_stats = batch_decision["per_request_stats"] # every item: queuing delay, inference time
49+
per_request_stats = [x for x in per_request_stats if x is not None]
50+
total_num_requests = sum([1 for x in per_request_stats if x is not None])
51+
52+
length = min(len(apparate_latency), len(optimal_latency))
53+
apparate_latency = apparate_latency[:length]
54+
optimal_latency = optimal_latency[:length]
55+
num_served_requests = len(apparate_latency) # NOTE(ruipan): might be smaller than total_num_requests b/c some are dropped
56+
print(f"num_served_requests {num_served_requests}")
57+
queuing_delays = [s[0] for s in per_request_stats if s is not None]
58+
queuing_delays = queuing_delays[:num_served_requests]
59+
model_inference_time_vanilla = [s[1] for s in per_request_stats[:num_served_requests]]
60+
model_inference_time_ee = apparate_latency
61+
model_inference_time_optimal = optimal_latency
62+
63+
serving_time_vanilla = [sum(x) for x in zip(queuing_delays, model_inference_time_vanilla)]
64+
serving_time_ee = [sum(x) for x in zip(queuing_delays, model_inference_time_ee)]
65+
serving_time_optimal = [sum(x) for x in zip(queuing_delays, model_inference_time_optimal)]
66+
67+
apparate_serving_improvement = 100 * (1 - np.median(serving_time_ee) / np.median(serving_time_vanilla))
68+
optimal_serving_improvement = 100 * (1 - np.median(serving_time_optimal) / np.median(serving_time_vanilla))
69+
70+
return {
71+
"apparate_serving_improvement": apparate_serving_improvement,
72+
"optimal_serving_improvement": optimal_serving_improvement,
73+
"serving_time_vanilla": serving_time_vanilla,
74+
"serving_time_ee": serving_time_ee,
75+
"serving_time_optimal": serving_time_optimal,
76+
}

0 commit comments

Comments
 (0)