Skip to content

Commit 4a153f3

Browse files
authored
data-manage: MergeRunTimeStatsToTable & calculate table statistics (#42)
* data-manage: MergeRunTimeStatsToTable & calculate table statistics Signed-off-by: Xiaolong Fu <[email protected]> * fmt fix Signed-off-by: Xiaolong Fu <[email protected]> --------- Signed-off-by: Xiaolong Fu <[email protected]>
1 parent eb6c061 commit 4a153f3

File tree

19 files changed

+574
-47
lines changed

19 files changed

+574
-47
lines changed

collector/src/compile_time/binary_size/plot/plotter_cmp.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,10 @@
2222
plt.legend(loc='lower right', prop={'size': 4})
2323

2424
plt.tight_layout()
25-
plt.savefig(out_file)
25+
plt.savefig(out_file)
26+
27+
name_value =[item for item in zip(names, values)]
28+
name_value.sort(key=lambda x: x[1])
29+
30+
for (name, value) in name_value:
31+
print(name, ":", round(value, 2))

collector/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
pub mod benchmark;
22
mod compile_time;
33
mod csv_transfer;
4-
mod execute;
4+
pub mod execute;
55
pub mod mir_analyze;
66
mod morpheme_miner;
77
mod pca_analysis;

collector/src/statistics/compile_time_stat.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ use crate::{
1010

1111
#[derive(Serialize, Deserialize, Clone, Debug)]
1212
pub struct CompileTimeResult {
13-
benchmark: String,
14-
iteration: usize,
15-
profile: Profile,
16-
scenario: Scenario,
17-
stats: Stats,
13+
pub benchmark: String,
14+
pub iteration: usize,
15+
pub profile: Profile,
16+
pub scenario: Scenario,
17+
pub stats: Stats,
1818
}
1919

2020
impl CompileTimeResult {
@@ -35,11 +35,11 @@ impl CompileTimeResult {
3535
}
3636
}
3737

38-
#[derive(Serialize, Deserialize, Debug)]
38+
#[derive(Serialize, Deserialize, Debug, Clone)]
3939
pub struct CompileTimeBenchResult {
40-
benchmark: String,
41-
iterations: usize,
42-
result_vec: Vec<CompileTimeResult>,
40+
pub benchmark: String,
41+
pub iterations: usize,
42+
pub result_vec: Vec<CompileTimeResult>,
4343
}
4444

4545
impl CompileTimeBenchResult {
@@ -90,10 +90,10 @@ impl CompileTimeBenchResult {
9090
}
9191
}
9292

93-
#[derive(Serialize, Deserialize)]
93+
#[derive(Serialize, Deserialize, Clone)]
9494
pub struct CompileTimeResultSet {
95-
id: String,
96-
results: Vec<CompileTimeBenchResult>,
95+
pub id: String,
96+
pub results: Vec<CompileTimeBenchResult>,
9797
}
9898

9999
impl CompileTimeResultSet {
@@ -141,7 +141,7 @@ impl CompileTimeResultSet {
141141
}
142142
}
143143

144-
#[derive(Serialize, Deserialize)]
144+
#[derive(Serialize, Deserialize, Clone)]
145145
pub struct CompileTimeStatistic {
146146
pub name: String,
147147
pub profile: Profile,

data_manage/src/commannds.rs

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ pub enum Commands {
2626
#[clap(long = "rust-ver")]
2727
rustc: String,
2828

29-
/// The path of output file
29+
/// The path of output directory
3030
#[clap(long = "out-path", default_value = "results")]
31-
out_path: PathBuf,
31+
out_dir: PathBuf,
3232
},
3333

3434
/// Merge runtime data of the same rustc version from different benchmark groups
@@ -42,6 +42,25 @@ pub enum Commands {
4242
#[clap(long = "rust-ver")]
4343
rustc: String,
4444

45+
/// The path of output dir
46+
#[clap(long = "out-path", default_value = "results")]
47+
out_dir: PathBuf,
48+
},
49+
50+
/// Compare 2 different datas on one metric and plot boxplot of their change rate.
51+
DataCompare {
52+
/// The first data input file.
53+
#[clap(long = "data-1")]
54+
data_a: PathBuf,
55+
56+
/// The second data input file.
57+
#[clap(long = "data-2")]
58+
data_b: PathBuf,
59+
60+
/// The metric that needs comparison.
61+
#[clap(long = "metric")]
62+
metric: String,
63+
4564
/// The path of output file
4665
#[clap(long = "out-path", default_value = "results")]
4766
out_path: PathBuf,
@@ -105,6 +124,16 @@ pub enum Commands {
105124
merged_metric: String,
106125
},
107126

127+
/// Calculate statistics of a table-data fmt file.
128+
CalculateTableStats {
129+
/// The path of table data fmt file.
130+
#[clap(long = "table-data")]
131+
table_data_path: PathBuf,
132+
/// The path of output file.
133+
#[clap(long = "out-path")]
134+
out_path: PathBuf,
135+
},
136+
108137
/// Merge compile-time stats into a table data fmt file.
109138
MergeCompileTimeStatsToTable {
110139
/// The path of table data fmt file.
@@ -121,6 +150,21 @@ pub enum Commands {
121150
new_metrics: String,
122151
},
123152

153+
/// Merge runtime stats into a table data fmt file.
154+
MergeRuntimeStatsToTable {
155+
/// The path of table data fmt file.
156+
#[clap(long = "table-data")]
157+
table_data_path: PathBuf,
158+
/// The path of runtime stats fmt file.
159+
#[clap(long = "stats")]
160+
stats_path: PathBuf,
161+
/// The path of output file.
162+
#[clap(long = "out-path")]
163+
out_path: PathBuf,
164+
/// Metrics merged from stats fmt file. Use ',' to concanate the metrics.
165+
#[clap(long = "new_metrics")]
166+
new_metrics: String,
167+
},
124168
/// Do pca analysis on a table fmt file.
125169
PcaAnalysis {
126170
/// The path of table data fmt file.
@@ -133,4 +177,15 @@ pub enum Commands {
133177
#[clap(long = "max-component-num")]
134178
max_component_num: u32,
135179
},
180+
181+
/// Normalize statistic by wall-time.
182+
NormalizeStat {
183+
/// The stat fmt file.
184+
#[clap(long = "stats")]
185+
stats: PathBuf,
186+
187+
/// The output path.
188+
#[clap(long = "out-path")]
189+
out_path: PathBuf,
190+
},
136191
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
use std::{
2+
path::PathBuf,
3+
process::{Command, Stdio},
4+
};
5+
6+
use super::data::{calculate_change_rate, read_data, ChangeRate};
7+
8+
pub fn compare_data(
9+
data_a: &PathBuf,
10+
data_b: &PathBuf,
11+
metric: &String,
12+
out_path: PathBuf,
13+
) -> anyhow::Result<PathBuf> {
14+
let data_a = read_data(data_a, metric)?;
15+
let data_b = read_data(data_b, metric)?;
16+
17+
// Calculate change rate of stats_a on stats_b
18+
let change_rate = calculate_change_rate(&data_a, &data_b);
19+
20+
plot_boxplot_compare(&change_rate, out_path, metric)
21+
}
22+
23+
fn plot_boxplot_compare(
24+
data: &ChangeRate,
25+
out_path: PathBuf,
26+
metric: &String,
27+
) -> anyhow::Result<PathBuf> {
28+
let mut cmd = Command::new("python");
29+
cmd.arg("src/plotters/plotter_cmp_boxplot.py")
30+
.arg(
31+
data.into_iter()
32+
.map(|(k, v)| {
33+
format!(
34+
"{}:{}",
35+
k,
36+
v.into_iter()
37+
.map(|d| d.to_string())
38+
.collect::<Vec<String>>()
39+
.join(",")
40+
)
41+
})
42+
.collect::<Vec<String>>()
43+
.join(";"),
44+
)
45+
.arg(&out_path)
46+
.arg(metric);
47+
cmd.stdout(Stdio::inherit());
48+
cmd.spawn().unwrap().wait().unwrap();
49+
50+
Ok(out_path)
51+
}
52+
53+
#[cfg(test)]
54+
mod test_compare_stat {
55+
use std::{
56+
fs::{self, remove_file},
57+
path::PathBuf,
58+
};
59+
60+
use super::compare_data;
61+
62+
/// test for compare_stat
63+
///
64+
/// Step1. compare stats of metric `instructions` in `test/compare_stat/stat`.
65+
///
66+
/// Step2. plot and check the compare result.
67+
///
68+
/// Step3. clean up.
69+
#[test]
70+
fn test_compare_data() {
71+
let stat_1 = PathBuf::from("test/compare_data/stat/merged-runtime_data_current.json");
72+
let stat_2 = PathBuf::from("test/compare_data/stat/merged-runtime_data_old.json");
73+
let metric = String::from("wall-time");
74+
let out_path = PathBuf::from("test/compare_data/compare_data.jpeg");
75+
76+
assert_eq!(
77+
out_path.clone(),
78+
compare_data(&stat_1, &stat_2, &metric, out_path.clone()).unwrap()
79+
);
80+
81+
fs::metadata(&out_path).unwrap();
82+
remove_file(out_path).unwrap();
83+
}
84+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
use std::{collections::HashMap, fs::File, io::BufReader, path::PathBuf};
2+
3+
use anyhow::bail;
4+
use collector::statistics::{
5+
compile_time_stat::CompileTimeResultSet, runtime_stat::RuntimeResultVec,
6+
};
7+
8+
pub type LabeledData = HashMap<String, Vec<f64>>;
9+
pub type ChangeRate = HashMap<String, Vec<f64>>;
10+
11+
pub fn read_data(data_file: &PathBuf, metric: &String) -> anyhow::Result<LabeledData> {
12+
// Filter out non-relevant metrics and reshape stats into Hashmap
13+
14+
match serde_json::from_reader::<_, CompileTimeResultSet>(BufReader::new(File::open(data_file)?))
15+
{
16+
Ok(s) => return Ok(reshape_data(s, metric)),
17+
Err(_) => (),
18+
}
19+
20+
match serde_json::from_reader::<_, RuntimeResultVec>(BufReader::new(File::open(data_file)?)) {
21+
Ok(s) => return Ok(reshape_runtime_data(s, metric)),
22+
Err(e) => bail!(e),
23+
}
24+
}
25+
26+
pub fn calculate_change_rate(data_a: &LabeledData, data_b: &LabeledData) -> ChangeRate {
27+
data_a
28+
.into_iter()
29+
.map(|(k, u)| {
30+
(k.clone(), {
31+
let mut v = data_b.get(k).unwrap().clone();
32+
v.sort_by(|a, b| a.partial_cmp(b).unwrap());
33+
let mut u = u.clone();
34+
u.sort_by(|a, b| a.partial_cmp(b).unwrap());
35+
36+
let mut v = v.into_iter();
37+
assert_eq!(u.len(), v.len());
38+
39+
u.into_iter()
40+
.map(|a| {
41+
let b = v.next().unwrap();
42+
(a - b) / b * 100.
43+
})
44+
.collect()
45+
})
46+
})
47+
.collect()
48+
}
49+
50+
fn reshape_data(data: CompileTimeResultSet, metric: &String) -> LabeledData {
51+
data.results
52+
.into_iter()
53+
.map(|s| {
54+
(
55+
s.benchmark,
56+
s.result_vec
57+
.into_iter()
58+
.map(|d| {
59+
d.stats
60+
.stats
61+
.into_iter()
62+
.find(|(m, _)| m == metric)
63+
.unwrap()
64+
})
65+
.map(|x| x.1)
66+
.collect(),
67+
)
68+
})
69+
.collect()
70+
}
71+
72+
fn reshape_runtime_data(data: RuntimeResultVec, metric: &String) -> LabeledData {
73+
data.0
74+
.into_iter()
75+
.map(|s| {
76+
(
77+
s.name,
78+
s.stats
79+
.into_iter()
80+
.map(|stats| stats.stats.into_iter().find(|(m, _)| m == metric).unwrap())
81+
.map(|x| x.1)
82+
.collect(),
83+
)
84+
})
85+
.collect()
86+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
pub mod compare_data;
12
pub mod compare_stat;
23
pub mod compare_stat_2d;
4+
mod data;
35
mod stats;

0 commit comments

Comments
 (0)