Skip to content

Commit

Permalink
run host compilation calls through sccache so they can be distributed
Browse files Browse the repository at this point in the history
  • Loading branch information
trxcllnt committed Aug 23, 2024
1 parent 5e0c84c commit 2ac567a
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 41 deletions.
15 changes: 13 additions & 2 deletions src/compiler/c.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1164,14 +1164,25 @@ impl<T: CommandCreatorSync, I: CCompilerImpl> Compilation<T> for CCompilation<I>
ref env_vars,
..
} = *self;
compiler.generate_compile_commands(

let (command, dist_command, cacheable) = compiler.generate_compile_commands(
path_transformer,
executable,
parsed_args,
cwd,
env_vars,
rewrite_includes_only,
)
)?;

let force_no_cache = env_vars
.iter()
.any(|(k, _v)| k.as_os_str() == "SCCACHE_NO_CACHE");

if force_no_cache {
Ok((command, dist_command, Cacheable::No))
} else {
Ok((command, dist_command, cacheable))
}
}

#[cfg(feature = "dist-client")]
Expand Down
76 changes: 51 additions & 25 deletions src/compiler/nvcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,16 +451,14 @@ impl CompileCommandImpl for NvccCompileCommand {
cwd,
} = self;

let mut env_vars = env_vars.to_vec();

let nvcc_subcommand_groups = group_nvcc_subcommands_by_compilation_stage(
creator,
executable,
arguments,
cwd,
temp_dir.as_path(),
keep_dir.clone(),
&mut env_vars,
env_vars,
)
.await?;

Expand Down Expand Up @@ -512,10 +510,11 @@ impl CompileCommandImpl for NvccCompileCommand {
nvcc_subcommand_groups[final_assembly_range].chunks(1),
] {
for command_groups in command_group_chunks {
let results = futures::future::join_all(command_groups.iter().map(|commands| {
run_nvcc_subcommands_group(service, creator, cwd, &env_vars, commands)
}))
.await;
let results =
futures::future::join_all(command_groups.iter().map(|commands| {
run_nvcc_subcommands_group(service, creator, cwd, commands)
}))
.await;

for result in results {
output = aggregate_output(output, result.unwrap_or_else(error_to_output));
Expand Down Expand Up @@ -547,6 +546,7 @@ pub struct NvccGeneratedSubcommand {
pub exe: PathBuf,
pub args: Vec<String>,
pub cwd: PathBuf,
pub env_vars: Vec<(OsString, OsString)>,
pub cacheable: Cacheable,
}

Expand All @@ -557,7 +557,7 @@ async fn group_nvcc_subcommands_by_compilation_stage<T>(
cwd: &Path,
tmp: &Path,
keep_dir: Option<PathBuf>,
env_vars: &mut Vec<(OsString, OsString)>,
env_vars: &[(OsString, OsString)],
) -> Result<Vec<Vec<NvccGeneratedSubcommand>>>
where
T: CommandCreatorSync,
Expand Down Expand Up @@ -587,7 +587,9 @@ where
// | sort -k 1n
// ```

let mut env_vars_copy = env_vars.to_vec();
let mut env_vars_1 = env_vars.to_vec();
let mut env_vars_2 = env_vars.to_vec();

let is_nvcc_exe =
|exe: &str| matches!(exe, "cicc" | "ptxas" | "cudafe++" | "nvlink" | "fatbinary");

Expand All @@ -597,7 +599,7 @@ where
creator,
executable,
cwd,
env_vars,
&mut env_vars_1,
keep_dir.is_none(),
arguments,
is_nvcc_exe,
Expand All @@ -607,22 +609,27 @@ where
creator,
executable,
cwd,
&mut env_vars_copy,
&mut env_vars_2,
keep_dir.is_none(),
&[arguments, &["--keep-dir".into(), tmp.into()][..]].concat(),
|exe| !is_nvcc_exe(exe),
),
)
.await?;

drop(env_vars_2);
let env_vars = env_vars_1;

// Now zip the two lists of commands again by sorting on original line index.
// Transform to tuples that include the dir in which each command should run.
let all_commands = nvcc_commands
.iter()
// Run cudafe++, nvlink, cicc, ptxas, and fatbinary in `tmp`
.map(|(idx, exe, args)| (idx, tmp, exe, args))
.chain(
host_commands
.iter()
// Run host preprocessing and compilation steps in `cwd`
.map(|(idx, exe, args)| (idx, cwd, exe, args)),
)
.sorted_by(|a, b| Ord::cmp(&a.0, &b.0));
Expand All @@ -647,20 +654,18 @@ where
);
}

// * cicc and ptxas are cacheable
// * cudafe++ and fatbinary are not cacheable
// * Run cudafe++, nvlink, cicc, ptxas, and fatbinary in `temp_dir`
// * Run host preprocessing and compilation steps in `cwd`
let cacheable = match exe.file_name().and_then(|s| s.to_str()) {
Some("cicc") | Some("ptxas") => Cacheable::Yes,
Some("cudafe++") | Some("nvlink") => Cacheable::No,
let (env_vars, cacheable) = match exe.file_name().and_then(|s| s.to_str()) {
// cicc and ptxas are cacheable
Some("cicc") | Some("ptxas") => (env_vars.clone(), Cacheable::Yes),
// cudafe++, nvlink, and fatbinary are not cacheable
Some("cudafe++") | Some("nvlink") => (env_vars.clone(), Cacheable::No),
Some("fatbinary") => {
// The fatbinary command represents the start of the last group
if !no_more_groups {
command_groups.push(vec![]);
}
no_more_groups = true;
Cacheable::No
(env_vars.clone(), Cacheable::No)
}
_ => {
// All generated host compiler commands include `-D__CUDA_ARCH_LIST__=`.
Expand All @@ -672,17 +677,37 @@ where
.any(|arg| arg.starts_with("-D__CUDA_ARCH_LIST__"))
{
continue;
} else if args.contains(&"-E".to_owned()) {
}
if args.contains(&"-E".to_owned()) {
// Each preprocessor step represents the start of a new command
// group, unless it comes after a call to fatbinary.
if !no_more_groups {
command_groups.push(vec![]);
}
// Do not run preprocessor calls through sccache
(env_vars.clone(), Cacheable::No)
} else {
// Returns Cacheable::Yes to indicate we _do_ want to run this host
// compiler call through sccache (because it may be distributed),
// but we _do not_ want to cache its output. The output file will
// be cached as the result of the outer `nvcc` command. Caching
// here would store the same object twice under two different hashes,
// unnecessarily bloating the cache size.
(
env_vars
.iter()
.chain(
[
// Do not cache host compiler calls
("SCCACHE_NO_CACHE".into(), "true".into()),
]
.iter(),
)
.cloned()
.collect::<Vec<_>>(),
Cacheable::Yes,
)
}
// Do not cache host compiler calls, since the output will
// be cached as the result of the outer `nvcc` command.
// Caching this would just store the same object twice.
Cacheable::No
}
};

Expand All @@ -699,6 +724,7 @@ where
exe: exe.clone(),
args: args.clone(),
cwd: dir.into(),
env_vars,
cacheable,
});
}
Expand Down Expand Up @@ -933,7 +959,6 @@ async fn run_nvcc_subcommands_group<T>(
service: &server::SccacheService<T>,
creator: &T,
cwd: &Path,
env_vars: &[(OsString, OsString)],
commands: &[NvccGeneratedSubcommand],
) -> Result<process::Output>
where
Expand All @@ -950,6 +975,7 @@ where
exe,
args,
cwd,
env_vars,
cacheable,
} = cmd;

Expand Down
10 changes: 9 additions & 1 deletion src/compiler/rust.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1746,7 +1746,15 @@ impl<T: CommandCreatorSync> Compilation<T> for RustCompilation {
})
})();

Ok((CCompileCommand::new(command), dist_command, Cacheable::Yes))
let force_no_cache = env_vars
.iter()
.any(|(k, _v)| k.as_os_str() == "SCCACHE_NO_CACHE");

if force_no_cache {
Ok((CCompileCommand::new(command), dist_command, Cacheable::No))
} else {
Ok((CCompileCommand::new(command), dist_command, Cacheable::Yes))
}
}

#[cfg(feature = "dist-client")]
Expand Down
31 changes: 18 additions & 13 deletions tests/system.rs
Original file line number Diff line number Diff line change
Expand Up @@ -588,12 +588,13 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile A request stats");
get_stats(|info| {
assert_eq!(1, info.stats.compile_requests);
assert_eq!(3, info.stats.requests_executed);
assert_eq!(4, info.stats.requests_executed);
assert_eq!(0, info.stats.cache_hits.all());
assert_eq!(3, info.stats.cache_misses.all());
assert_eq!(4, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
Expand Down Expand Up @@ -622,15 +623,16 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile A request stats");
get_stats(|info| {
assert_eq!(2, info.stats.compile_requests);
assert_eq!(4, info.stats.requests_executed);
assert_eq!(5, info.stats.requests_executed);
assert_eq!(1, info.stats.cache_hits.all());
assert_eq!(3, info.stats.cache_misses.all());
assert_eq!(4, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert!(info.stats.cache_hits.get("PTX").is_none());
assert!(info.stats.cache_hits.get("CUBIN").is_none());
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
Expand Down Expand Up @@ -664,15 +666,16 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile B request stats");
get_stats(|info| {
assert_eq!(3, info.stats.compile_requests);
assert_eq!(7, info.stats.requests_executed);
assert_eq!(9, info.stats.requests_executed);
assert_eq!(2, info.stats.cache_hits.all());
assert_eq!(5, info.stats.cache_misses.all());
assert_eq!(7, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert!(info.stats.cache_hits.get("PTX").is_none());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
Expand Down Expand Up @@ -705,15 +708,16 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile ptx request stats");
get_stats(|info| {
assert_eq!(4, info.stats.compile_requests);
assert_eq!(9, info.stats.requests_executed);
assert_eq!(11, info.stats.requests_executed);
assert_eq!(3, info.stats.cache_hits.all());
assert_eq!(6, info.stats.cache_misses.all());
assert_eq!(8, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
Expand Down Expand Up @@ -746,15 +750,16 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
trace!("compile cubin request stats");
get_stats(|info| {
assert_eq!(5, info.stats.compile_requests);
assert_eq!(12, info.stats.requests_executed);
assert_eq!(14, info.stats.requests_executed);
assert_eq!(5, info.stats.cache_hits.all());
assert_eq!(7, info.stats.cache_misses.all());
assert_eq!(9, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap());
let adv_cuda_key = adv_key_kind("cuda", compiler.name);
let adv_ptx_key = adv_key_kind("ptx", compiler.name);
let adv_cubin_key = adv_key_kind("cubin", compiler.name);
Expand Down Expand Up @@ -829,13 +834,13 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
trace!("request stats");
get_stats(|info| {
assert_eq!(4, info.stats.compile_requests);
assert_eq!(6, info.stats.requests_executed);
assert_eq!(8, info.stats.requests_executed);
assert_eq!(3, info.stats.cache_hits.all());
assert_eq!(3, info.stats.cache_misses.all());
assert_eq!(5, info.stats.cache_misses.all());
assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
assert_eq!(&3, info.stats.cache_misses.get("C/C++").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
});
Expand Down

0 comments on commit 2ac567a

Please sign in to comment.