run host compilation calls through sccache so they can be distributed

mozilla · Aug 23, 2024 · 2ac567a · 2ac567a
1 parent 5e0c84c
commit 2ac567a
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 41 deletions.
diff --git a/src/compiler/c.rs b/src/compiler/c.rs
@@ -1164,14 +1164,25 @@ impl<T: CommandCreatorSync, I: CCompilerImpl> Compilation<T> for CCompilation<I>
             ref env_vars,
             ..
         } = *self;
-        compiler.generate_compile_commands(
+
+        let (command, dist_command, cacheable) = compiler.generate_compile_commands(
             path_transformer,
             executable,
             parsed_args,
             cwd,
             env_vars,
             rewrite_includes_only,
-        )
+        )?;
+
+        let force_no_cache = env_vars
+            .iter()
+            .any(|(k, _v)| k.as_os_str() == "SCCACHE_NO_CACHE");
+
+        if force_no_cache {
+            Ok((command, dist_command, Cacheable::No))
+        } else {
+            Ok((command, dist_command, cacheable))
+        }
     }
 
     #[cfg(feature = "dist-client")]

diff --git a/src/compiler/nvcc.rs b/src/compiler/nvcc.rs
@@ -451,16 +451,14 @@ impl CompileCommandImpl for NvccCompileCommand {
             cwd,
         } = self;
 
-        let mut env_vars = env_vars.to_vec();
-
         let nvcc_subcommand_groups = group_nvcc_subcommands_by_compilation_stage(
             creator,
             executable,
             arguments,
             cwd,
             temp_dir.as_path(),
             keep_dir.clone(),
-            &mut env_vars,
+            env_vars,
         )
         .await?;
 
@@ -512,10 +510,11 @@ impl CompileCommandImpl for NvccCompileCommand {
             nvcc_subcommand_groups[final_assembly_range].chunks(1),
         ] {
             for command_groups in command_group_chunks {
-                let results = futures::future::join_all(command_groups.iter().map(|commands| {
-                    run_nvcc_subcommands_group(service, creator, cwd, &env_vars, commands)
-                }))
-                .await;
+                let results =
+                    futures::future::join_all(command_groups.iter().map(|commands| {
+                        run_nvcc_subcommands_group(service, creator, cwd, commands)
+                    }))
+                    .await;
 
                 for result in results {
                     output = aggregate_output(output, result.unwrap_or_else(error_to_output));
@@ -547,6 +546,7 @@ pub struct NvccGeneratedSubcommand {
     pub exe: PathBuf,
     pub args: Vec<String>,
     pub cwd: PathBuf,
+    pub env_vars: Vec<(OsString, OsString)>,
     pub cacheable: Cacheable,
 }
 
@@ -557,7 +557,7 @@ async fn group_nvcc_subcommands_by_compilation_stage<T>(
     cwd: &Path,
     tmp: &Path,
     keep_dir: Option<PathBuf>,
-    env_vars: &mut Vec<(OsString, OsString)>,
+    env_vars: &[(OsString, OsString)],
 ) -> Result<Vec<Vec<NvccGeneratedSubcommand>>>
 where
     T: CommandCreatorSync,
@@ -587,7 +587,9 @@ where
     //   | sort -k 1n
     // ```
 
-    let mut env_vars_copy = env_vars.to_vec();
+    let mut env_vars_1 = env_vars.to_vec();
+    let mut env_vars_2 = env_vars.to_vec();
+
     let is_nvcc_exe =
         |exe: &str| matches!(exe, "cicc" | "ptxas" | "cudafe++" | "nvlink" | "fatbinary");
 
@@ -597,7 +599,7 @@ where
             creator,
             executable,
             cwd,
-            env_vars,
+            &mut env_vars_1,
             keep_dir.is_none(),
             arguments,
             is_nvcc_exe,
@@ -607,22 +609,27 @@ where
             creator,
             executable,
             cwd,
-            &mut env_vars_copy,
+            &mut env_vars_2,
             keep_dir.is_none(),
             &[arguments, &["--keep-dir".into(), tmp.into()][..]].concat(),
             |exe| !is_nvcc_exe(exe),
         ),
     )
     .await?;
 
+    drop(env_vars_2);
+    let env_vars = env_vars_1;
+
     // Now zip the two lists of commands again by sorting on original line index.
     // Transform to tuples that include the dir in which each command should run.
     let all_commands = nvcc_commands
         .iter()
+        // Run cudafe++, nvlink, cicc, ptxas, and fatbinary in `tmp`
         .map(|(idx, exe, args)| (idx, tmp, exe, args))
         .chain(
             host_commands
                 .iter()
+                // Run host preprocessing and compilation steps in `cwd`
                 .map(|(idx, exe, args)| (idx, cwd, exe, args)),
         )
         .sorted_by(|a, b| Ord::cmp(&a.0, &b.0));
@@ -647,20 +654,18 @@ where
             );
         }
 
-        // * cicc and ptxas are cacheable
-        // * cudafe++ and fatbinary are not cacheable
-        // * Run cudafe++, nvlink, cicc, ptxas, and fatbinary in `temp_dir`
-        // * Run host preprocessing and compilation steps in `cwd`
-        let cacheable = match exe.file_name().and_then(|s| s.to_str()) {
-            Some("cicc") | Some("ptxas") => Cacheable::Yes,
-            Some("cudafe++") | Some("nvlink") => Cacheable::No,
+        let (env_vars, cacheable) = match exe.file_name().and_then(|s| s.to_str()) {
+            // cicc and ptxas are cacheable
+            Some("cicc") | Some("ptxas") => (env_vars.clone(), Cacheable::Yes),
+            // cudafe++, nvlink, and fatbinary are not cacheable
+            Some("cudafe++") | Some("nvlink") => (env_vars.clone(), Cacheable::No),
             Some("fatbinary") => {
                 // The fatbinary command represents the start of the last group
                 if !no_more_groups {
                     command_groups.push(vec![]);
                 }
                 no_more_groups = true;
-                Cacheable::No
+                (env_vars.clone(), Cacheable::No)
             }
             _ => {
                 // All generated host compiler commands include `-D__CUDA_ARCH_LIST__=`.
@@ -672,17 +677,37 @@ where
                     .any(|arg| arg.starts_with("-D__CUDA_ARCH_LIST__"))
                 {
                     continue;
-                } else if args.contains(&"-E".to_owned()) {
+                }
+                if args.contains(&"-E".to_owned()) {
                     // Each preprocessor step represents the start of a new command
                     // group, unless it comes after a call to fatbinary.
                     if !no_more_groups {
                         command_groups.push(vec![]);
                     }
+                    // Do not run preprocessor calls through sccache
+                    (env_vars.clone(), Cacheable::No)
+                } else {
+                    // Returns Cacheable::Yes to indicate we _do_ want to run this host
+                    // compiler call through sccache (because it may be distributed),
+                    // but we _do not_ want to cache its output. The output file will
+                    // be cached as the result of the outer `nvcc` command. Caching
+                    // here would store the same object twice under two different hashes,
+                    // unnecessarily bloating the cache size.
+                    (
+                        env_vars
+                            .iter()
+                            .chain(
+                                [
+                                    // Do not cache host compiler calls
+                                    ("SCCACHE_NO_CACHE".into(), "true".into()),
+                                ]
+                                .iter(),
+                            )
+                            .cloned()
+                            .collect::<Vec<_>>(),
+                        Cacheable::Yes,
+                    )
                 }
-                // Do not cache host compiler calls, since the output will
-                // be cached as the result of the outer `nvcc` command.
-                // Caching this would just store the same object twice.
-                Cacheable::No
             }
         };
 
@@ -699,6 +724,7 @@ where
                     exe: exe.clone(),
                     args: args.clone(),
                     cwd: dir.into(),
+                    env_vars,
                     cacheable,
                 });
             }
@@ -933,7 +959,6 @@ async fn run_nvcc_subcommands_group<T>(
     service: &server::SccacheService<T>,
     creator: &T,
     cwd: &Path,
-    env_vars: &[(OsString, OsString)],
     commands: &[NvccGeneratedSubcommand],
 ) -> Result<process::Output>
 where
@@ -950,6 +975,7 @@ where
             exe,
             args,
             cwd,
+            env_vars,
             cacheable,
         } = cmd;
 

diff --git a/src/compiler/rust.rs b/src/compiler/rust.rs
@@ -1746,7 +1746,15 @@ impl<T: CommandCreatorSync> Compilation<T> for RustCompilation {
             })
         })();
 
-        Ok((CCompileCommand::new(command), dist_command, Cacheable::Yes))
+        let force_no_cache = env_vars
+            .iter()
+            .any(|(k, _v)| k.as_os_str() == "SCCACHE_NO_CACHE");
+
+        if force_no_cache {
+            Ok((CCompileCommand::new(command), dist_command, Cacheable::No))
+        } else {
+            Ok((CCompileCommand::new(command), dist_command, Cacheable::Yes))
+        }
     }
 
     #[cfg(feature = "dist-client")]

diff --git a/tests/system.rs b/tests/system.rs
@@ -588,12 +588,13 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     trace!("compile A request stats");
     get_stats(|info| {
         assert_eq!(1, info.stats.compile_requests);
-        assert_eq!(3, info.stats.requests_executed);
+        assert_eq!(4, info.stats.requests_executed);
         assert_eq!(0, info.stats.cache_hits.all());
-        assert_eq!(3, info.stats.cache_misses.all());
+        assert_eq!(4, info.stats.cache_misses.all());
         assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
+        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
         let adv_cuda_key = adv_key_kind("cuda", compiler.name);
         let adv_ptx_key = adv_key_kind("ptx", compiler.name);
         let adv_cubin_key = adv_key_kind("cubin", compiler.name);
@@ -622,15 +623,16 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     trace!("compile A request stats");
     get_stats(|info| {
         assert_eq!(2, info.stats.compile_requests);
-        assert_eq!(4, info.stats.requests_executed);
+        assert_eq!(5, info.stats.requests_executed);
         assert_eq!(1, info.stats.cache_hits.all());
-        assert_eq!(3, info.stats.cache_misses.all());
+        assert_eq!(4, info.stats.cache_misses.all());
         assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
         assert!(info.stats.cache_hits.get("PTX").is_none());
         assert!(info.stats.cache_hits.get("CUBIN").is_none());
         assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
+        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
         let adv_cuda_key = adv_key_kind("cuda", compiler.name);
         let adv_ptx_key = adv_key_kind("ptx", compiler.name);
         let adv_cubin_key = adv_key_kind("cubin", compiler.name);
@@ -664,15 +666,16 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     trace!("compile B request stats");
     get_stats(|info| {
         assert_eq!(3, info.stats.compile_requests);
-        assert_eq!(7, info.stats.requests_executed);
+        assert_eq!(9, info.stats.requests_executed);
         assert_eq!(2, info.stats.cache_hits.all());
-        assert_eq!(5, info.stats.cache_misses.all());
+        assert_eq!(7, info.stats.cache_misses.all());
         assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
         assert!(info.stats.cache_hits.get("PTX").is_none());
         assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
         assert_eq!(&2, info.stats.cache_misses.get("CUDA").unwrap());
         assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
+        assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap());
         let adv_cuda_key = adv_key_kind("cuda", compiler.name);
         let adv_ptx_key = adv_key_kind("ptx", compiler.name);
         let adv_cubin_key = adv_key_kind("cubin", compiler.name);
@@ -705,15 +708,16 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     trace!("compile ptx request stats");
     get_stats(|info| {
         assert_eq!(4, info.stats.compile_requests);
-        assert_eq!(9, info.stats.requests_executed);
+        assert_eq!(11, info.stats.requests_executed);
         assert_eq!(3, info.stats.cache_hits.all());
-        assert_eq!(6, info.stats.cache_misses.all());
+        assert_eq!(8, info.stats.cache_misses.all());
         assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
         assert_eq!(&1, info.stats.cache_hits.get("PTX").unwrap());
         assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
         assert_eq!(&3, info.stats.cache_misses.get("CUDA").unwrap());
         assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
+        assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap());
         let adv_cuda_key = adv_key_kind("cuda", compiler.name);
         let adv_ptx_key = adv_key_kind("ptx", compiler.name);
         let adv_cubin_key = adv_key_kind("cubin", compiler.name);
@@ -746,15 +750,16 @@ fn test_nvcc_cuda_compiles(compiler: &Compiler, tempdir: &Path) {
     trace!("compile cubin request stats");
     get_stats(|info| {
         assert_eq!(5, info.stats.compile_requests);
-        assert_eq!(12, info.stats.requests_executed);
+        assert_eq!(14, info.stats.requests_executed);
         assert_eq!(5, info.stats.cache_hits.all());
-        assert_eq!(7, info.stats.cache_misses.all());
+        assert_eq!(9, info.stats.cache_misses.all());
         assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
         assert_eq!(&2, info.stats.cache_hits.get("PTX").unwrap());
         assert_eq!(&2, info.stats.cache_hits.get("CUBIN").unwrap());
         assert_eq!(&4, info.stats.cache_misses.get("CUDA").unwrap());
         assert_eq!(&2, info.stats.cache_misses.get("PTX").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("CUBIN").unwrap());
+        assert_eq!(&2, info.stats.cache_misses.get("C/C++").unwrap());
         let adv_cuda_key = adv_key_kind("cuda", compiler.name);
         let adv_ptx_key = adv_key_kind("ptx", compiler.name);
         let adv_cubin_key = adv_key_kind("cubin", compiler.name);
@@ -829,13 +834,13 @@ fn test_nvcc_proper_lang_stat_tracking(compiler: Compiler, tempdir: &Path) {
     trace!("request stats");
     get_stats(|info| {
         assert_eq!(4, info.stats.compile_requests);
-        assert_eq!(6, info.stats.requests_executed);
+        assert_eq!(8, info.stats.requests_executed);
         assert_eq!(3, info.stats.cache_hits.all());
-        assert_eq!(3, info.stats.cache_misses.all());
+        assert_eq!(5, info.stats.cache_misses.all());
         assert_eq!(&1, info.stats.cache_hits.get("C/C++").unwrap());
         assert_eq!(&1, info.stats.cache_hits.get("CUDA").unwrap());
         assert_eq!(&1, info.stats.cache_hits.get("CUBIN").unwrap());
-        assert_eq!(&1, info.stats.cache_misses.get("C/C++").unwrap());
+        assert_eq!(&3, info.stats.cache_misses.get("C/C++").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("CUDA").unwrap());
         assert_eq!(&1, info.stats.cache_misses.get("PTX").unwrap());
     });