Skip to content

Commit

Permalink
Merge pull request #21 from 191220029/record-compiled-binary-size
Browse files Browse the repository at this point in the history
Impl plottor for compiled_binary_size
  • Loading branch information
genedna authored Apr 13, 2024
2 parents 03c626c + 7fb20d9 commit 2cfea37
Show file tree
Hide file tree
Showing 14 changed files with 1,950 additions and 13 deletions.
1 change: 1 addition & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ jobs:
- name: Install dependencies
run: |
pip install -U kaleido
pip install -U matplotlib
- name: Build and test
run: |
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ https://blog.eastonman.com/blog/2021/02/use-perf/

3. Relative dependancies
python3
kaleido (can be installed with pip...
kaleido, matplotlib (can be installed with pip...
```
pip install -U kaleido
pip install -U kaleido matplotlib
```
or with conda...)
```
conda install -c conda-forge python-kaleido
conda install -c conda-forge python-kaleido matplotlib
```

**Attention:** The harness & flamegraph will fail to collect some of the specific data we need if you are going to run it in a virtual environment.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{

use crate::{benchmark::profile::Profile, execute::Stats, toolchain::Compiler};

use super::BinaryProcess;
use super::{BinaryProcess, BINARY_SIZE_LABEL};

pub struct BinaryPackageProcess<'a> {
pub compiler: Compiler<'a>,
Expand Down Expand Up @@ -81,7 +81,7 @@ impl<'a> BinaryProcess for BinaryPackageProcess<'a> {
} else {
let mut stats = Stats::new();
stats.stats.insert(
"binary_size".to_string(),
BINARY_SIZE_LABEL.to_string(),
binary_size as f64 / (1 << 20) as f64,
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{

use crate::{benchmark::profile::Profile, execute::Stats, toolchain::Compiler};

use super::BinaryProcess;
use super::{BinaryProcess, BINARY_SIZE_LABEL};

pub struct BinarySingleProcess<'a> {
pub compiler: Compiler<'a>,
Expand Down Expand Up @@ -76,7 +76,7 @@ impl<'a> BinaryProcess for BinarySingleProcess<'a> {
} else {
let mut stats = Stats::new();
stats.stats.insert(
"binary_size".to_string(),
BINARY_SIZE_LABEL.to_string(),
binary_size as f64 / (1 << 20) as f64,
);

Expand Down
62 changes: 60 additions & 2 deletions collector/src/compile_time/binary_size/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{
ffi::OsString,
fs::read_dir,
mem::ManuallyDrop,
path::{Path, PathBuf},
};
Expand All @@ -24,6 +25,9 @@ use super::result::CompileTimeBenchResult;

pub mod binary_package_process;
pub mod binary_single_process;
pub mod plotter;

const BINARY_SIZE_LABEL: &str = "binary_size";

pub trait BinaryProcess {
fn run_rustc(&self) -> anyhow::Result<Option<Stats>>;
Expand All @@ -38,13 +42,33 @@ pub trait BinaryProcess {
"incremental",
".fingerprint",
".cargo-lock",
"CACHEDIR.TAG",
".rustc_info.json",
];
if let Some(file_name) = file_name.to_str() {
filted_names.contains(&file_name) | file_name.ends_with(".d")
} else {
true
}
}

/// Calculate the binary size of compiled target.
fn get_binary_size(&self, target_dir: PathBuf) -> anyhow::Result<u64> {
let mut binary_size = 0;
let dir = read_dir(target_dir)?;
for entry in dir {
let entry = entry?;
if !self.is_filtered_file_name(entry.file_name()) {
let md = entry.metadata()?;
if md.is_file() {
binary_size += entry.metadata()?.len();
} else if md.is_dir() {
binary_size += self.get_binary_size(entry.path())?;
}
}
}
Ok(binary_size)
}
}

pub fn bench_binary_size(
Expand Down Expand Up @@ -196,11 +220,18 @@ impl Benchamrk {

#[cfg(test)]
mod test_binary_size {
use std::{path::PathBuf, process::Command};
use std::{
path::{Path, PathBuf},
process::Command,
};

use anyhow::Context;

use crate::{benchmark::profile::Profile, toolchain::LocalToolchain};
use crate::{
benchmark::profile::Profile,
compile_time::binary_size::{binary_single_process::BinarySingleProcess, BinaryProcess},
toolchain::{Compiler, LocalToolchain},
};

use super::bench_binary_size;

Expand Down Expand Up @@ -255,4 +286,31 @@ mod test_binary_size {
.for_each(|s| s.stats.values().for_each(|v| assert!(*v > 0.)));
})
}

/// Test for BinaryProcess::get_binary_size
///
/// Step1: Get the binary size of `./collector`.
///
/// Step2: Verify the size of binary size (>15MB).
#[test]
fn test_get_binary_size() {
let binary_process = BinarySingleProcess {
compiler: Compiler {
rustc: Path::new("null"),
cargo: Path::new("null"),
is_nightly: false,
},
processor_name: "test".to_string(),
cwd: Path::new("null"),
profile: Profile::Check,
incremental: false,
manifest_path: String::new(),
cargo_args: vec![],
rustc_args: vec![],
touch_file: None,
target_path: None,
};
let binary_size = binary_process.get_binary_size(PathBuf::from(".")).unwrap();
assert!((binary_size as f64 / (1 << 20) as f64) > 15.);
}
}
67 changes: 67 additions & 0 deletions collector/src/compile_time/binary_size/plotter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import matplotlib.pyplot as plt
import numpy as np
import sys

def annotate_interval(y_axis: list[float]) -> list[list[float, float]]:
tolerance = 7.0
cur_offset_left = 0
cur_offset_right = 0
cur_offset = 0
intervals = []
for (i, y) in enumerate(y_axis):
if i % 2 == 0:
cur_offset = cur_offset_left
else:
cur_offset = cur_offset_right

if i - 2 >= 0:
interval = y - y_axis[i-2]
if interval < tolerance:
cur_offset += tolerance - interval
else:
cur_offset = max(0, cur_offset - (interval - tolerance))

if i % 2 == 0:
cur_offset_left = cur_offset
intervals.append([-80, cur_offset])
else:
cur_offset_right = cur_offset
intervals.append([20, cur_offset])
return intervals

if __name__ == '__main__':
args = sys.argv
assert(len(args) > 3)

raw_data_1 = args[1]
raw_data_2 = args[2]
label_1 = args[3]
label_2 = args[4]
out_file = args[5]

data_pair_1 = [[item.split(',')[0], float(item.split(',')[1])] for item in raw_data_1.split(';')]
data_pair_2 = [[item.split(',')[0], float(item.split(',')[1])] for item in raw_data_2.split(';')]
data_pair_1.sort(key=lambda d: d[1])
data_pair_2.sort(key=lambda d: d[1])

data_1 = [item[1] for item in data_pair_1]
data_2 = [item[1] for item in data_pair_2]
interval_1 = annotate_interval(data_1)
interval_2 = annotate_interval(data_2)
annotate_1 = [item[0] for item in data_pair_1]
annotate_2 = [item[0] for item in data_pair_2]

plt.figure(dpi=500)
plt.boxplot([data_1, data_2], labels=[label_1, label_2])

plt.scatter([1]*len(data_1), data_1, color='green', marker='o', s=2)
plt.scatter([2]*len(data_2), data_2, color='green', marker='o', s=2)

for i, d1 in enumerate(data_1):
plt.annotate(annotate_1[i], (1, d1), textcoords="offset points", xytext=(interval_1[i][0], interval_1[i][1]), arrowprops=dict(headlength = 0.1, width = 0.15, headwidth = 0.1, shrink=0.99, linewidth=0.2, mutation_scale=0.1), fontsize=9)
for i, d2 in enumerate(data_2):
plt.annotate(annotate_2[i], (2, d2), textcoords="offset points", xytext=(interval_2[i][0], interval_2[i][1]), arrowprops=dict(headlength = 0.1, width = 0.15, headwidth = 0.1, shrink=0.99, linewidth=0.2, mutation_scale=0.1), fontsize=9)

plt.ylabel('Binary Size (MB)')

plt.savefig(out_file)
92 changes: 92 additions & 0 deletions collector/src/compile_time/binary_size/plotter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use std::{
fs::File,
io::BufReader,
path::PathBuf,
process::{Command, Stdio},
};

use crate::{
benchmark::profile::Profile,
compile_time::{binary_size::BINARY_SIZE_LABEL, result::CompileTimeBenchResult},
};

pub fn plot(
data_file_a: PathBuf,
data_file_b: PathBuf,
label_a: String,
label_b: String,
out_path: PathBuf,
profile: Profile,
) -> anyhow::Result<()> {
let data_a: Vec<CompileTimeBenchResult> =
serde_json::from_reader(BufReader::new(File::open(data_file_a)?))?;

let data_b: Vec<CompileTimeBenchResult> =
serde_json::from_reader(BufReader::new(File::open(data_file_b)?))?;

let get_benchmark_binary_size = |d: &Vec<CompileTimeBenchResult>| {
d.iter()
.map(|d| {
d.get_benchmark()
+ ","
+ d.get_stats_ref_by_profile(&profile)
.first()
.unwrap()
.stats
.get(&BINARY_SIZE_LABEL.to_string())
.unwrap()
.to_string()
.as_str()
})
.collect::<Vec<String>>()
.join(";")
};

let benchmark_binary_size_a = get_benchmark_binary_size(&data_a);
let benchmark_binary_size_b = get_benchmark_binary_size(&data_b);

let mut cmd = Command::new("python");
cmd.arg("src/compile_time/binary_size/plotter.py")
.arg(benchmark_binary_size_a)
.arg(benchmark_binary_size_b)
.arg(label_a)
.arg(label_b)
.arg(&out_path);
cmd.stdout(Stdio::inherit());
cmd.spawn().unwrap().wait().unwrap();

Ok(())
}

#[cfg(test)]
mod test_binary_size_plotter {
use std::{
fs::{self, remove_file},
path::PathBuf,
};

use crate::benchmark::profile::Profile;

use super::plot;

#[test]
fn test_plotter() {
let data_path_a = PathBuf::from("test/binary_size/plotter/merged_binary_size.json");
let data_path_b =
PathBuf::from("test/binary_size/plotter/merged_rustc_perf_binary_size.json");
let file_path = PathBuf::from("test/binary_size/plotter/merged_binary_size.jpg");

plot(
data_path_a,
data_path_b,
"A".to_string(),
"B".to_string(),
file_path.clone(),
Profile::Release,
)
.unwrap();

fs::metadata(&file_path).unwrap();
remove_file(file_path).unwrap();
}
}
2 changes: 1 addition & 1 deletion collector/src/compile_time/result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl CompileTimeBenchResult {
.collect()
}

fn get_stats_with_profile_scenario(&self) -> HashMap<(Profile, Scenario), Vec<Stats>> {
pub fn get_stats_with_profile_scenario(&self) -> HashMap<(Profile, Scenario), Vec<Stats>> {
let mut map = HashMap::<(Profile, Scenario), Vec<Stats>>::new();
self.result_vec.iter().for_each(|r| {
if map.contains_key(&(r.profile, r.scenario)) {
Expand Down
13 changes: 12 additions & 1 deletion collector/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::{
use anyhow::{Context, Ok};
use benchmark::scenario::Scenario;
use clap::Parser;
use compile_time::bench_compile_time;
use compile_time::{bench_compile_time, binary_size::plotter::plot};
use runtime::bench_runtime;
use toolchain::{Cli, Commands, ResultWriter};

Expand Down Expand Up @@ -272,6 +272,17 @@ fn main_result() -> anyhow::Result<i32> {

Ok(0)
}
Commands::BinaryPlot {
data1,
data2,
data1_label,
data2_label,
profile,
out_path,
} => {
plot(data1, data2, data1_label, data2_label, out_path, profile)?;
Ok(0)
}
}
}

Expand Down
24 changes: 23 additions & 1 deletion collector/src/toolchain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use std::{
use anyhow::{Context, Ok};
use log::debug;

use crate::benchmark::profile::Profiles;
use crate::benchmark::profile::{Profile, Profiles};

#[derive(Debug, Copy, Clone)]
pub struct Compiler<'a> {
Expand Down Expand Up @@ -286,6 +286,28 @@ pub enum Commands {
out_dir: PathBuf,
},

/// Boxplot and scatterplot for a pair of compiled_binary_size data for comparsion.
BinaryPlot {
/// The path of compiled_binary_size data 1
#[clap(long = "data1")]
data1: PathBuf,
/// The path of compiled_binary_size data 2
#[clap(long = "data2")]
data2: PathBuf,
/// The label of compiled_binary_size data 1
#[clap(long = "data1-label")]
data1_label: String,
/// The label of compiled_binary_size data 2
#[clap(long = "data2-label")]
data2_label: String,
/// The profile of data collected
#[clap(long = "profile")]
profile: Profile,
/// The path of output figure
#[clap(long = "out-path")]
out_path: PathBuf,
},

/// Trasfer Json outpu to csv output.
TransferCsvOutput {
/// origin json output directory
Expand Down
Loading

0 comments on commit 2cfea37

Please sign in to comment.