diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2aedc4a..6084d1d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,6 +49,7 @@ jobs: - nix-build-header - nix-build-multi-source - nix-build-shared-lib + - nix-build-dynamic-deps steps: - uses: actions/checkout@v4 - name: Setup Nix diff --git a/Cargo.lock b/Cargo.lock index fe45939..76c1240 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -368,8 +368,8 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "include-graph", "n2", + "regex", "shell-words", "tracing", "tracing-subscriber", @@ -734,6 +734,7 @@ dependencies = [ "regex", "serde", "serde_json", + "sha2", "shell-words", "walkdir", "which", diff --git a/README.md b/README.md index d34d688..6152bcd 100644 --- a/README.md +++ b/README.md @@ -34,8 +34,8 @@ system that outputs ninja like CMake, meson, premake, gn, etc. ## Getting started -First you need to use [nix@d904921] and enable the following experimental -features: +First you need to use Nix 2.30 or later (newer than stable) and enable the +following experimental features: ```sh experimental-features = ["nix-command" "dynamic-derivations" "ca-derivations" "recursive-nix"] @@ -99,4 +99,3 @@ The source code developed for nix-ninja is licensed under MIT License. [dynamic-derivations]: docs/dynamic-derivations.md [milestones]: https://github.com/pdtpartners/nix-ninja/milestones [ninja-build]: https://ninja-build.org/ -[nix@d904921]: https://github.com/NixOS/nix/commit/d904921eecbc17662fef67e8162bd3c7d1a54ce0 diff --git a/crates/deps-infer/Cargo.toml b/crates/deps-infer/Cargo.toml index 645cc81..3a00ed5 100644 --- a/crates/deps-infer/Cargo.toml +++ b/crates/deps-infer/Cargo.toml @@ -8,8 +8,8 @@ license = "MIT" [dependencies] anyhow = "1.0" clap = { version = "4.5", features = ["derive"] } -include-graph = { git = "https://github.com/hinshun/igraph", branch = "performance-improvements" } n2 = { git = "https://github.com/hinshun/n2", branch = "feature/minimal-pub", default-features = false } +regex = "1" shell-words = "1.1.0" tracing = { version = "0.1" } tracing-subscriber = { version = "0.3.18", features = [ diff --git a/crates/deps-infer/src/c_include_parser.rs b/crates/deps-infer/src/c_include_parser.rs index f942482..4e57a78 100644 --- a/crates/deps-infer/src/c_include_parser.rs +++ b/crates/deps-infer/src/c_include_parser.rs @@ -1,16 +1,32 @@ use crate::gcc_include_parser; -use anyhow::Result; -use include_graph::dependencies::cparse; +use anyhow::{anyhow, Result}; +use regex::Regex; +use std::borrow::Borrow; +use std::collections::HashMap; use std::collections::{HashSet, VecDeque}; -use std::path::PathBuf; +use std::fmt::Debug; +use std::fs::canonicalize; +use std::fs::File; +use std::hash::Hash; +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, LazyLock, RwLock}; -pub fn retrieve_c_includes(cmdline: &str, files: Vec) -> Result> { +pub fn retrieve_c_includes( + cmdline: &str, + files: Vec, + virtual_paths: Option>, +) -> Result> { let includes = gcc_include_parser::parse_include_dirs(cmdline)?; - bfs_parse_includes(files, &includes) + bfs_parse_includes(files, &includes, virtual_paths) } /// Recursively collect all dependencies using BFS -fn bfs_parse_includes(files: Vec, include_dirs: &[PathBuf]) -> Result> { +fn bfs_parse_includes( + files: Vec, + include_dirs: &[PathBuf], + virtual_paths: Option>, +) -> Result> { let mut visited = HashSet::new(); let mut result = Vec::new(); let mut queue = VecDeque::new(); @@ -29,9 +45,10 @@ fn bfs_parse_includes(files: Vec, include_dirs: &[PathBuf]) -> Result = queue.drain(..).collect(); // Process all files in the current batch in parallel - let sources_with_includes = cparse::all_sources_and_includes( + let sources_with_includes = all_sources_and_includes( current_batch.into_iter().map(Ok::<_, std::io::Error>), include_dirs, + virtual_paths.as_ref(), )?; // Process each source's includes @@ -47,3 +64,159 @@ fn bfs_parse_includes(files: Vec, include_dirs: &[PathBuf]) -> Result, +} + +/// Given a list of paths, figure out their dependencies +pub fn all_sources_and_includes( + paths: I, + includes: &[PathBuf], + virtual_paths: Option<&HashMap>, +) -> Result> +where + I: Iterator>, + E: Debug, +{ + let includes = Arc::new(Vec::from(includes)); + let virtual_paths = Arc::new(virtual_paths.cloned()); + let mut handles = Vec::new(); + + for entry in paths { + let path = match entry { + Ok(value) => canonicalize_cached(value.clone(), virtual_paths.as_ref().as_ref()) + .map_err(|e| anyhow!("{:?}", e))? + .ok_or(anyhow!( + "Required file not found {}", + value.to_string_lossy() + ))?, + Err(e) => return Err(anyhow!("{:?}", e)), + }; + let includes = includes.clone(); + let virtual_paths = virtual_paths.clone(); + + handles.push(std::thread::spawn(move || { + let includes = match extract_includes(&path, &includes, virtual_paths.as_ref().as_ref()) + { + Ok(value) => value, + Err(e) => { + return Err(e); + } + }; + + Ok(SourceWithIncludes { path, includes }) + })); + } + + let mut results = Vec::new(); + for handle in handles { + let res = handle.join().map_err(|_| anyhow!("Join error"))?; + results.push(res?); + } + + Ok(results) +} + +static INCLUDE_REGEX: LazyLock = + LazyLock::new(|| Regex::new(r##"^\s*#\s*include\s*(["<])([^">]*)[">]"##).unwrap()); + +/// Given a C-like source, try to resolve includes. +/// +/// Includes are generally of the form `#include ` or `#include "name"` +pub fn extract_includes( + path: &PathBuf, + include_dirs: &[PathBuf], + virtual_paths: Option<&HashMap>, +) -> Result> { + let f = + File::open(path).map_err(|e| anyhow!("Failed to open file {}: {}", path.display(), e))?; + let reader = BufReader::new(f); + let mut result = Vec::new(); + let parent_dir = PathBuf::from(path.parent().unwrap()); + + let lines = reader.lines(); + + for line in lines { + let line = match line { + Ok(l) => l, + Err(_) => { + // Usually this means the file isn't UTF-8 and we can skip. + return Ok(result); + } + }; + + if let Some(captures) = INCLUDE_REGEX.captures(&line) { + let inc_type = captures.get(1).unwrap().as_str(); + let relative_path = PathBuf::from(captures.get(2).unwrap().as_str()); + + if inc_type == "\"" { + if let Some(p) = try_resolve(&parent_dir, &relative_path, virtual_paths) { + result.push(p); + continue; + } + } + + if let Some(p) = include_dirs + .iter() + .find_map(|i| try_resolve(i, &relative_path, virtual_paths)) + { + result.push(p); + } + } + } + + Ok(result) +} + +fn try_resolve( + head: &Path, + tail: &Path, + virtual_paths: Option<&HashMap>, +) -> Option { + canonicalize_cached(head.join(tail), virtual_paths).ok()? +} + +type PathCache = Arc>>>; +static PATH_CACHE: LazyLock = LazyLock::new(Default::default); + +pub fn canonicalize_cached

( + path: P, + virtual_paths: Option<&HashMap>, +) -> Result, std::io::Error> +where + P: AsRef, + PathBuf: Borrow

, + P: Hash + Eq, +{ + // Check virtual paths first if provided + if let Some(virtual_paths) = virtual_paths { + for (build_path, actual_path) in virtual_paths { + if build_path.as_path() == path.as_ref() { + return Ok(Some(actual_path.clone())); + } + } + } + + { + // Then try the cache. + let cache = PATH_CACHE.read().unwrap(); + if let Some(cached) = cache.get(&path) { + return Ok(cached.clone()); + } + } + + // If cache-miss, then look it up ourselves. + let result = if path.as_ref().exists() { + Some(canonicalize(&path)?) + } else { + None + }; + + let mut cache = PATH_CACHE.write().unwrap(); + cache.insert(path.as_ref().to_path_buf(), result.clone()); + + Ok(result) +} diff --git a/crates/deps-infer/src/main.rs b/crates/deps-infer/src/main.rs index 82bba1c..05a2643 100644 --- a/crates/deps-infer/src/main.rs +++ b/crates/deps-infer/src/main.rs @@ -150,6 +150,7 @@ fn run_scan_mode(target: Target) -> Result<()> { let c_includes = c_include_parser::retrieve_c_includes( &target.cmdline, vec![target.filename.clone().into()], + None, )?; println!("C include parser method:"); for include in c_includes { @@ -177,6 +178,7 @@ fn run_benchmark_mode(targets: Vec) -> Result<()> { c_include_parser::retrieve_c_includes( &target.cmdline, vec![target.filename.clone().into()], + None, )?; } let c_duration = c_start.elapsed(); @@ -207,6 +209,7 @@ fn run_correctness_mode(targets: Vec) -> Result<()> { let mut c_includes = c_include_parser::retrieve_c_includes( &target.cmdline, vec![target.filename.clone().into()], + None, )?; c_includes = normalize_paths(c_includes, ¤t_dir); diff --git a/crates/nix-libstore/src/derivation.rs b/crates/nix-libstore/src/derivation.rs index 063b3ba..c34da70 100644 --- a/crates/nix-libstore/src/derivation.rs +++ b/crates/nix-libstore/src/derivation.rs @@ -1,4 +1,6 @@ -use anyhow::{anyhow, Result}; +use crate::derived_path::{SingleDerivedPath, SingleDerivedPathBuilt}; +use crate::store_path::StorePath; +use anyhow::Result; use serde::{Deserialize, Serialize, Serializer}; use std::collections::{HashMap, HashSet}; @@ -130,31 +132,12 @@ impl Derivation { self } - /// Add an environment variable - pub fn add_env(&mut self, key: &str, value: &str) -> &mut Self { + /// Set an environment variable + pub fn set_env(&mut self, key: &str, value: &str) -> &mut Self { self.env.insert(key.to_string(), value.to_string()); self } - /// Add an input source - pub fn add_input_src(&mut self, path: &str) -> &mut Self { - self.input_srcs.insert(path.to_string()); - self - } - - /// Add an input derivation - pub fn add_input_drv(&mut self, path: &str, outputs: Vec) -> &mut Self { - let input_drv = self - .input_drvs - .entry(path.to_string()) - .or_insert_with(|| InputDrv { - outputs: vec![], - dynamic_outputs: HashMap::new(), - }); - input_drv.outputs.extend(outputs); - self - } - /// Add an output pub fn add_output( &mut self, @@ -192,31 +175,6 @@ impl Derivation { self } - /// Add a dynamic output to an input derivation - pub fn add_dynamic_output( - &mut self, - drv_path: &str, - output_name: &str, - outputs: Vec, - ) -> Result<&mut Self> { - self.add_input_drv(drv_path, vec![]); - - let input_drv = self - .input_drvs - .get_mut(drv_path) - .ok_or_else(|| anyhow!("Input derivation not found: {}", drv_path))?; - - input_drv.dynamic_outputs.insert( - output_name.to_string(), - DynamicOutput { - outputs, - dynamic_outputs: HashMap::new(), - }, - ); - - Ok(self) - } - /// Serialize to JSON pub fn to_json(&self) -> Result { Ok(serde_json::to_string(self)?) @@ -231,6 +189,92 @@ impl Derivation { pub fn from_json(json: &str) -> Result { Ok(serde_json::from_str(json)?) } + + /// Add an input source + pub fn add_input_src(&mut self, store_path: &StorePath) -> &mut Self { + self.input_srcs.insert(store_path.to_string()); + self + } + + /// Add a derived path as input (either source or derivation) + pub fn add_derived_path(&mut self, derived_path: &SingleDerivedPath) -> &mut Self { + match derived_path { + SingleDerivedPath::Opaque(store_path) => { + // For opaque paths, add as input source + self.add_input_src(store_path); + } + SingleDerivedPath::Built(built) => { + self.add_input_built(built); + } + } + self + } + + /// Add a built derivation path as an input to this derivation + fn add_input_built(&mut self, built: &SingleDerivedPathBuilt) { + let drv_store_path = built.derived_path.store_path().to_string(); + let input_drv = self + .input_drvs + .entry(drv_store_path) + .or_insert_with(|| InputDrv { + outputs: vec![], + dynamic_outputs: HashMap::new(), + }); + + Self::add_built_nested( + &mut input_drv.outputs, + &mut input_drv.dynamic_outputs, + built, + ); + } + + /// Add a built path with potentially nested dynamic derivation structure + fn add_built_nested( + outputs: &mut Vec, + dynamic_outputs: &mut HashMap, + built: &SingleDerivedPathBuilt, + ) { + // Extract chain of output names from outermost to innermost + let mut chain = Vec::new(); + let mut current = built; + + loop { + chain.push(current.output.clone()); + match current.derived_path.as_ref() { + SingleDerivedPath::Opaque(_) => break, + SingleDerivedPath::Built(inner) => current = inner, + } + } + + // Reverse to process innermost to outermost + chain.reverse(); + + // Split into intermediate levels and final output + let Some((final_output, intermediate_levels)) = chain.split_last() else { + return; + }; + + // Navigate through intermediate levels, creating dynamic outputs + let mut current_outputs = outputs; + let mut current_dynamics = dynamic_outputs; + + for level in intermediate_levels { + let dynamic_output = + current_dynamics + .entry(level.clone()) + .or_insert_with(|| DynamicOutput { + outputs: vec![], + dynamic_outputs: HashMap::new(), + }); + current_outputs = &mut dynamic_output.outputs; + current_dynamics = &mut dynamic_output.dynamic_outputs; + } + + // Add final output + if !current_outputs.contains(final_output) { + current_outputs.push(final_output.clone()); + } + } } fn serialize_hashset_as_vec(set: &HashSet, serializer: S) -> Result @@ -264,9 +308,11 @@ where #[cfg(test)] mod tests { use super::*; + use crate::derived_path::SingleDerivedPathBuilt; + use crate::store_path::StorePath; #[test] - fn test_derivation_serialization() { + fn derivation_serialization() { // Create a basic derivation let mut drv = Derivation::new( "hello", @@ -277,7 +323,7 @@ mod tests { // Add some basic properties drv.add_arg("-c") .add_arg("echo Hello > $out") - .add_env( + .set_env( "PATH", "/nix/store/d1pzgj1pj3nk97vhm5x6n8szy4w3xhx7-coreutils/bin", ) @@ -298,52 +344,131 @@ mod tests { } #[test] - fn test_ca_derivation() { - // Create a content-addressed derivation + fn ca_derivation() { let mut drv = Derivation::new( "ca-example", "x86_64-linux", "/nix/store/w7jl0h7mwrrrcy2kgvk9c9h9142f1ca0-bash/bin/bash", ); - // Add a content-addressed output drv.add_ca_output("out", HashAlgorithm::Sha256, OutputHashMode::Nar); - // Serialize to JSON - let json = drv.to_json().unwrap(); - - // Check that it contains the content-addressed output properties - assert!(json.contains("sha256")); - assert!(json.contains("nar")); + let output = drv.outputs.get("out").unwrap(); + assert_eq!(output.hash_algo, Some(HashAlgorithm::Sha256)); + assert_eq!(output.method, Some(OutputHashMode::Nar)); + assert_eq!(output.hash, None); } #[test] - fn test_dynamic_derivation() { - // Create a derivation with dynamic outputs - let mut drv = Derivation::new( - "dynamic-example", - "x86_64-linux", - "/nix/store/w7jl0h7mwrrrcy2kgvk9c9h9142f1ca0-bash/bin/bash", - ); + fn add_opaque_path() { + let mut drv = Derivation::new("test", "x86_64-linux", "/bin/bash"); + let store_path1 = sample_store_path(); + let store_path2 = + StorePath::new("/nix/store/zyxwvutsrqponmlkjihgfedcba987654-other").unwrap(); + let path1 = SingleDerivedPath::Opaque(store_path1.clone()); + let path2 = SingleDerivedPath::Opaque(store_path2.clone()); + + drv.add_derived_path(&path1); + drv.add_derived_path(&path2); + + assert!(drv.input_srcs.contains(&store_path1.to_string())); + assert!(drv.input_srcs.contains(&store_path2.to_string())); + assert!(drv.input_drvs.is_empty()); + } - // Add an input derivation - drv.add_input_drv( - "/nix/store/ac8da0sqpg4pyhzyr0qgl26d5dnpn7qp-ca-example.drv", - vec![], - ); + #[test] + fn add_built_path() { + let mut drv = Derivation::new("test", "x86_64-linux", "/bin/bash"); + let store_path = sample_store_path(); + let built1 = SingleDerivedPathBuilt::new(store_path.clone(), "out".to_string()); + let built2 = SingleDerivedPathBuilt::new(store_path.clone(), "dev".to_string()); + let path1 = SingleDerivedPath::Built(built1); + let path2 = SingleDerivedPath::Built(built2); + + drv.add_derived_path(&path1); + drv.add_derived_path(&path2); + + assert!(drv.input_srcs.is_empty()); + let input_drv = drv.input_drvs.get(&store_path.to_string()).unwrap(); + let mut outputs = input_drv.outputs.clone(); + outputs.sort(); + assert_eq!(outputs, vec!["dev", "out"]); + assert!(input_drv.dynamic_outputs.is_empty()); + } - // Add a dynamic output - drv.add_dynamic_output( - "/nix/store/ac8da0sqpg4pyhzyr0qgl26d5dnpn7qp-ca-example.drv", - "out", - vec!["out".to_string()], - ) - .unwrap(); + #[test] + fn add_multiple_dynamic_outputs() { + let mut drv = Derivation::new("test", "x86_64-linux", "/bin/bash"); + let store_path = sample_store_path(); + + // Add first dynamic derivation: store_path^inner^output1 + let inner1 = SingleDerivedPathBuilt::new(store_path.clone(), "inner".to_string()); + let inner_path1 = SingleDerivedPath::Built(inner1); + let outer1 = SingleDerivedPathBuilt::from_derived_path(inner_path1, "output1".to_string()); + let path1 = SingleDerivedPath::Built(outer1); + + drv.add_derived_path(&path1); + + // Check first dynamic path was added correctly + assert!(drv.input_srcs.is_empty()); + let input_drv = drv.input_drvs.get(&store_path.to_string()).unwrap(); + assert!(input_drv.outputs.is_empty()); + let dynamic_output = input_drv.dynamic_outputs.get("inner").unwrap(); + assert_eq!(dynamic_output.outputs, vec!["output1"]); + + // Add second dynamic derivation with same inner output: store_path^inner^output2 + let inner2 = SingleDerivedPathBuilt::new(store_path.clone(), "inner".to_string()); + let inner_path2 = SingleDerivedPath::Built(inner2); + let outer2 = SingleDerivedPathBuilt::from_derived_path(inner_path2, "output2".to_string()); + let path2 = SingleDerivedPath::Built(outer2); + + drv.add_derived_path(&path2); + + // Check aggregation: both outputs under same dynamic output + let input_drv = drv.input_drvs.get(&store_path.to_string()).unwrap(); + let dynamic_output = input_drv.dynamic_outputs.get("inner").unwrap(); + let mut outputs = dynamic_output.outputs.clone(); + outputs.sort(); + assert_eq!(outputs, vec!["output1", "output2"]); + assert!(dynamic_output.dynamic_outputs.is_empty()); + } - // Serialize to JSON - let json = drv.to_json().unwrap(); + #[test] + fn add_nested_dynamic_output() { + let mut drv = Derivation::new("test", "x86_64-linux", "/bin/bash"); + let store_path = sample_store_path(); + + // Create deeply nested structure: store_path^level1^level2^level3^output + let level1 = SingleDerivedPathBuilt::new(store_path.clone(), "level1".to_string()); + let level1_path = SingleDerivedPath::Built(level1); + let level2 = SingleDerivedPathBuilt::from_derived_path(level1_path, "level2".to_string()); + let level2_path = SingleDerivedPath::Built(level2); + let level3 = SingleDerivedPathBuilt::from_derived_path(level2_path, "level3".to_string()); + let level3_path = SingleDerivedPath::Built(level3); + let final_output = + SingleDerivedPathBuilt::from_derived_path(level3_path, "output".to_string()); + let path = SingleDerivedPath::Built(final_output); + + drv.add_derived_path(&path); + + // Should handle arbitrarily deep nesting + assert!(drv.input_srcs.is_empty()); + let input_drv = drv.input_drvs.get(&store_path.to_string()).unwrap(); + assert!(input_drv.outputs.is_empty()); + + // Navigate the nested structure + let level1_dynamic = input_drv.dynamic_outputs.get("level1").unwrap(); + assert!(level1_dynamic.outputs.is_empty()); + + let level2_dynamic = level1_dynamic.dynamic_outputs.get("level2").unwrap(); + assert!(level2_dynamic.outputs.is_empty()); + + let level3_dynamic = level2_dynamic.dynamic_outputs.get("level3").unwrap(); + assert_eq!(level3_dynamic.outputs, vec!["output"]); + assert!(level3_dynamic.dynamic_outputs.is_empty()); + } - // Check that it contains the dynamic outputs - assert!(json.contains("dynamicOutputs")); + fn sample_store_path() -> StorePath { + StorePath::new("/nix/store/abcdefghijklmnopqrstuvwxyz123456-test").unwrap() } } diff --git a/crates/nix-libstore/src/derived_path.rs b/crates/nix-libstore/src/derived_path.rs index 73d942d..a1f2091 100644 --- a/crates/nix-libstore/src/derived_path.rs +++ b/crates/nix-libstore/src/derived_path.rs @@ -14,7 +14,7 @@ impl SingleDerivedPath { pub fn store_path(&self) -> StorePath { match self { SingleDerivedPath::Opaque(store_path) => store_path.clone(), - SingleDerivedPath::Built(built_path) => built_path.drv_path.clone(), + SingleDerivedPath::Built(built_path) => built_path.derived_path.store_path(), } } } @@ -30,20 +30,104 @@ impl fmt::Display for SingleDerivedPath { /// A single derived path that is built from a derivation. /// Built derived paths are a pair of a derivation and an output name. +/// +/// The derivation itself can be either a store path (Opaque) or another built derivation (Built), +/// allowing for higher-order/nested dynamic derivations. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct SingleDerivedPathBuilt { - pub drv_path: StorePath, + pub derived_path: Box, pub output: String, } impl SingleDerivedPathBuilt { + /// Create a new SingleDerivedPathBuilt from a store path and output name + pub fn new(drv_path: StorePath, output: String) -> Self { + Self { + derived_path: Box::new(SingleDerivedPath::Opaque(drv_path)), + output, + } + } + + /// Create a new SingleDerivedPathBuilt from another SingleDerivedPath and output name + pub fn from_derived_path(drv_path: SingleDerivedPath, output: String) -> Self { + Self { + derived_path: Box::new(drv_path), + output, + } + } + pub fn placeholder(&self) -> PathBuf { - Placeholder::ca_output(&self.drv_path, &self.output).render() + self.placeholder_recursive().render() + } + + fn placeholder_recursive(&self) -> Placeholder { + match self.derived_path.as_ref() { + SingleDerivedPath::Opaque(store_path) => { + // Base case: regular ca_output placeholder + Placeholder::ca_output(store_path, &self.output) + } + SingleDerivedPath::Built(inner_built) => { + // Recursive case: create dynamic_output placeholder + let inner_placeholder = inner_built.placeholder_recursive(); + Placeholder::dynamic_output(&inner_placeholder, &self.output) + } + } } } impl fmt::Display for SingleDerivedPathBuilt { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}^{}", &self.drv_path, &self.output) + write!(f, "{}^{}", &self.derived_path, &self.output) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_store_path() -> StorePath { + StorePath::new("/nix/store/abcdefghijklmnopqrstuvwxyz123456-test").unwrap() + } + + #[test] + fn opaque_path() { + let store_path = sample_store_path(); + let path = SingleDerivedPath::Opaque(store_path.clone()); + + assert_eq!(path.store_path(), store_path); + assert_eq!(format!("{path}"), store_path.to_string()); + } + + #[test] + fn built_path() { + let store_path = sample_store_path(); + let built = SingleDerivedPathBuilt::new(store_path.clone(), "out".to_string()); + let path = SingleDerivedPath::Built(built); + + assert_eq!(path.store_path(), store_path); + assert_eq!(format!("{path}"), format!("{}^out", store_path)); + } + + #[test] + fn nested_path() { + let store_path = sample_store_path(); + + // Create inner derivation: store-path^inner + let inner_built = SingleDerivedPathBuilt::new(store_path.clone(), "inner".to_string()); + let inner_path = SingleDerivedPath::Built(inner_built); + + // Create outer derivation: (store-path^inner)^outer + let outer_built = + SingleDerivedPathBuilt::from_derived_path(inner_path, "outer".to_string()); + let outer_path = SingleDerivedPath::Built(outer_built); + + // The store_path should resolve to the innermost store path + assert_eq!(outer_path.store_path(), store_path); + + // The display should show the full nested structure + assert_eq!( + format!("{outer_path}"), + format!("{}^inner^outer", store_path) + ); } } diff --git a/crates/nix-libstore/src/store_path.rs b/crates/nix-libstore/src/store_path.rs index d0b5af9..e9c7bb6 100644 --- a/crates/nix-libstore/src/store_path.rs +++ b/crates/nix-libstore/src/store_path.rs @@ -3,7 +3,7 @@ use std::fmt; use std::path::PathBuf; /// A Nix store path -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct StorePath { /// The full path including the store directory path: PathBuf, diff --git a/crates/nix-ninja-task/src/derived_file.rs b/crates/nix-ninja-task/src/derived_file.rs index ecbd73e..7039ac9 100644 --- a/crates/nix-ninja-task/src/derived_file.rs +++ b/crates/nix-ninja-task/src/derived_file.rs @@ -2,18 +2,29 @@ use anyhow::{anyhow, Result}; use nix_libstore::derived_path::SingleDerivedPath; use nix_libstore::store_path::StorePath; use std::fmt; +use std::fs; +use std::os::unix::fs::symlink; use std::path::PathBuf; +/// Represents a file input or output for nix-ninja-task builds. +/// +/// DerivedFile describes how files are arranged in the build directory that nix-ninja-task +/// creates. The build directory contains symlinks that recreate the original source structure, +/// allowing builds to reference files using relative paths while the actual files come from +/// various Nix store locations. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct DerivedFile { - pub path: SingleDerivedPath, + pub derived_path: SingleDerivedPath, pub build_path: PathBuf, // Where file appears in build dir (symlink destination) pub rel_path: Option, // Where file appears within derived path (None for opaque) } impl DerivedFile { + /// Encodes this DerivedFile for passing from nix-ninja to nix-ninja-task. + /// + /// Format: `"::"` pub fn to_encoded(&self) -> String { - let path_str = match &self.path { + let path_str = match &self.derived_path { SingleDerivedPath::Opaque(store_path) => { store_path.path().to_string_lossy().to_string() } @@ -34,13 +45,15 @@ impl DerivedFile { ) } + /// Decodes a DerivedFile from the string format created by `to_encoded()`. + /// Used by nix-ninja-task to recreate build directory symlinks. pub fn from_encoded(encoded: &str) -> Result { let mut parts = encoded.split(':'); let store_path = StorePath::new(parts.next().ok_or_else(|| { anyhow!("Missing store path in encoded derived file: {encoded}") })?)?; - let path = SingleDerivedPath::Opaque(store_path); + let derived_path = SingleDerivedPath::Opaque(store_path); let build_path = PathBuf::from( parts .next() @@ -49,7 +62,7 @@ impl DerivedFile { let rel_path = parts.next().filter(|s| !s.is_empty()).map(PathBuf::from); Ok(DerivedFile { - path, + derived_path, build_path, rel_path, }) @@ -58,7 +71,7 @@ impl DerivedFile { impl fmt::Display for DerivedFile { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let base_path = match &self.path { + let base_path = match &self.derived_path { SingleDerivedPath::Opaque(store_path) => store_path.path().clone(), SingleDerivedPath::Built(built_path) => built_path.placeholder(), }; @@ -69,3 +82,58 @@ impl fmt::Display for DerivedFile { } } } + +impl From<&DerivedFile> for PathBuf { + fn from(df: &DerivedFile) -> Self { + let base_path = match &df.derived_path { + SingleDerivedPath::Opaque(store_path) => store_path.path().clone(), + SingleDerivedPath::Built(built_path) => built_path.placeholder(), + }; + if let Some(rel_path) = &df.rel_path { + base_path.join(rel_path) + } else { + base_path + } + } +} + +/// Creates symlinks for derived files under the specified prefix. +/// +/// For each derived file, creates a symlink at `prefix/${derived_file.build_path}` +/// pointing to the actual file at `derived_file.rel_path`. +pub fn create_symlinks( + prefix: &std::path::Path, + inputs: Vec, + overwrite: bool, +) -> Result<()> { + for input in inputs { + let source_path = input.to_string(); + let dest_path = prefix.join(&input.build_path); + + // Create parent directories if they don't exist + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent)?; + } + + if !std::path::Path::new(&source_path).exists() { + return Err(anyhow!( + "nix-ninja-task: symlink source does not exist: {source_path}" + )); + } + + if overwrite && dest_path.exists() { + fs::remove_file(&dest_path)?; + } + + symlink(&source_path, &dest_path).map_err(|e| { + anyhow!( + "Failed to create symlink from {} to {}: {}", + source_path, + dest_path.display(), + e + ) + })?; + } + + Ok(()) +} diff --git a/crates/nix-ninja-task/src/main.rs b/crates/nix-ninja-task/src/main.rs index dbfedbe..07dc945 100644 --- a/crates/nix-ninja-task/src/main.rs +++ b/crates/nix-ninja-task/src/main.rs @@ -1,11 +1,10 @@ -use anyhow::{anyhow, Result}; +use anyhow::Result; use clap::command; use clap::Parser; -use nix_ninja_task::derived_file::DerivedFile; +use nix_ninja_task::derived_file::{create_symlinks, DerivedFile}; use nix_ninja_task::patchelf; use std::env; use std::fs; -use std::os::unix::fs::symlink; use std::path::PathBuf; use std::process::{Command, Stdio}; @@ -60,13 +59,17 @@ fn main() -> Result<()> { // symlinked while preserving the original directory hierarchy of the // sources. This ensures relative includes and other path-dependent // references remain valid. - create_symlinks(&cli.build_dir, inputs)?; + create_symlinks(&cli.build_dir, inputs, false)?; println!( "nix-ninja-task: Setup source directory in {}", cli.build_dir.display() ); - create_parent_dirs(&outputs)?; + // Outputs are written to the same directory structure as the build + // directory because if the output is a shared library the filename must + // match the soname and it must be in a directory to add to the linking + // binary's RUNPATH. + create_output_dirs(&outputs)?; if let Some(desc) = cli.description { println!("nix-ninja-task: {desc}"); @@ -74,7 +77,7 @@ fn main() -> Result<()> { // Spawn cmdline process via sh like ninja upstream does. println!("nix-ninja-task: Running: /bin/sh -c \"{}\"", cli.cmdline); - let exit_code = spawn_process(cli.cmdline)?; + let exit_code = spawn_process(&cli.cmdline)?; if exit_code != 0 { println!("nix-ninja-task: Failed with exit code {exit_code}"); std::process::exit(exit_code); @@ -103,40 +106,7 @@ fn main() -> Result<()> { Ok(()) } -/// Creates symlinks for derived files under the specified prefix. -/// -/// For each derived file, creates a symlink at `prefix/${derived_file.build_path}` -/// pointing to the actual file at `derived_file.path`. -fn create_symlinks(prefix: &std::path::Path, inputs: Vec) -> Result<()> { - for input in inputs { - let source_path = input.to_string(); - let dest_path = prefix.join(&input.build_path); - - // Create parent directories if they don't exist - if let Some(parent) = dest_path.parent() { - fs::create_dir_all(parent)?; - } - - if !std::path::Path::new(&source_path).exists() { - return Err(anyhow!( - "nix-ninja-task: symlink source does not exist: {source_path}" - )); - } - - symlink(&source_path, &dest_path).map_err(|e| { - anyhow!( - "Failed to create symlink from {} to {}: {}", - source_path, - dest_path.display(), - e - ) - })?; - } - - Ok(()) -} - -fn create_parent_dirs(outputs: &Vec) -> Result<()> { +fn create_output_dirs(outputs: &Vec) -> Result<()> { let mut dirs: Vec<&std::path::Path> = Vec::new(); for output in outputs { if let Some(parent) = output.build_path.parent() { @@ -150,9 +120,9 @@ fn create_parent_dirs(outputs: &Vec) -> Result<()> { Ok(()) } -fn spawn_process(cmdline: String) -> Result { +fn spawn_process(cmdline: &str) -> Result { let mut cmd = Command::new("/bin/sh"); - cmd.args(["-c", &cmdline]) + cmd.args(["-c", cmdline]) .stdout(Stdio::inherit()) .stderr(Stdio::inherit()) .envs(env::vars()); diff --git a/crates/nix-ninja/Cargo.toml b/crates/nix-ninja/Cargo.toml index 83aff6a..23a4578 100644 --- a/crates/nix-ninja/Cargo.toml +++ b/crates/nix-ninja/Cargo.toml @@ -17,6 +17,7 @@ nix-tool = { path = "../nix-tool" } regex = "1" serde = "1" serde_json = "1" +sha2 = "0.10" shell-words = "1.1.0" walkdir = "2" which = "7.0.2" diff --git a/crates/nix-ninja/src/build.rs b/crates/nix-ninja/src/build.rs index d8252d0..5933029 100644 --- a/crates/nix-ninja/src/build.rs +++ b/crates/nix-ninja/src/build.rs @@ -14,7 +14,7 @@ pub struct BuildConfig { pub build_dir: PathBuf, pub store_dir: PathBuf, pub nix_tool: String, - pub extra_inputs: Vec, + pub is_output_derivation: bool, } pub fn build( @@ -29,13 +29,7 @@ pub fn build( extra_args: Vec::new(), }); - let tools = task::Tools { - nix, - cc: task::which_store_path("cc")?, - coreutils: task::which_store_path("coreutils")?, - nix_ninja_task: task::which_store_path("nix-ninja-task")?, - patchelf: task::which_store_path("patchelf")?, - }; + let tools = task::Tools::new(nix)?; let mut runner = task::Runner::new( tools, @@ -43,10 +37,10 @@ pub fn build( system: "x86_64-linux".to_string(), build_dir: config.build_dir, store_dir: config.store_dir, + is_output_derivation: config.is_output_derivation, }, )?; runner.read_build_dir(&mut loader.graph.files)?; - runner.add_extra_inputs(&mut loader.graph.files, config.extra_inputs)?; let mut scheduler = Scheduler::new(&mut loader.graph, &mut runner); diff --git a/crates/nix-ninja/src/cli.rs b/crates/nix-ninja/src/cli.rs index a1a1826..c4d0e61 100644 --- a/crates/nix-ninja/src/cli.rs +++ b/crates/nix-ninja/src/cli.rs @@ -1,10 +1,15 @@ use crate::build::{self, BuildConfig}; +use crate::local; +use crate::subtool::dynamic_task; use anyhow::{anyhow, Result}; use clap::Parser; -use nix_libstore::store_path::StorePath; use nix_ninja_task::derived_file::DerivedFile; use nix_tool::{NixTool, StoreConfig}; -use std::{env, fs, os::unix::fs::symlink, path::PathBuf, str}; +use std::{ + env, fs, + path::{Path, PathBuf}, + str, +}; #[derive(Parser)] #[command( @@ -52,22 +57,6 @@ pub struct Cli { #[arg(long, default_value = "false", env = "NIX_NINJA_DRV", hide = true)] pub is_output_derivation: bool, - /// Until we dynamically create derivations that can infer C dependencies - /// on derivation outputs, we have this hack to inject additional inputs - /// that are inferred and source-linked into the nix-ninja-task - /// environment. - /// - /// For example, Nix uses Bison to generate a parser-tab.cc from a - /// .parser.y. The parser-tab.cc depends on finally.hh but we cannot - /// determine it during nix-ninja build-time, only at nix-ninja-task - /// build-time. - #[arg( - long = "extra-inputs", - env = "NIX_NINJA_EXTRA_INPUTS", - value_delimiter = ',' - )] - pub extra_inputs: Vec, - /// Target to build (only used with certain subtools) #[arg(trailing_var_arg = true)] pub targets: Vec, @@ -86,19 +75,31 @@ pub fn run() -> Result { if let Some(dir) = &cli.dir { std::env::set_current_dir(dir)?; } + let build_dir = std::env::current_dir()?; + + let nix_tool = NixTool::new(StoreConfig { + nix_tool: cli.nix_tool.clone(), + extra_args: Vec::new(), + }); // Handle subtool if specified if let Some(tool) = cli.tool.clone() { - return subtool(&cli, &tool); + return subtool( + nix_tool, + &build_dir, + cli.store_dir.as_path(), + &tool, + cli.targets.clone(), + ); } - match build(&cli) { + match build(&cli, &build_dir) { Ok(derived_file) => { if cli.is_output_derivation { let out = env::var("out").map_err(|_| anyhow!("Expected $out to be set"))?; - fs::copy(derived_file.path.store_path().path(), out)?; + fs::copy(derived_file.derived_path.store_path().path(), out)?; } else { - nix_build(&cli, &derived_file)?; + local::symlink_derived_files(&nix_tool, &build_dir, &[derived_file])?; } Ok(0) } @@ -109,13 +110,12 @@ pub fn run() -> Result { } } -fn build(cli: &Cli) -> Result { - let build_dir = std::env::current_dir()?; +fn build(cli: &Cli, build_dir: &Path) -> Result { let config = BuildConfig { - build_dir, + build_dir: build_dir.to_path_buf(), store_dir: cli.store_dir.clone(), nix_tool: cli.nix_tool.clone(), - extra_inputs: cli.extra_inputs.clone(), + is_output_derivation: cli.is_output_derivation, }; build::build( @@ -125,47 +125,29 @@ fn build(cli: &Cli) -> Result { ) } -fn nix_build(cli: &Cli, derived_file: &DerivedFile) -> Result<()> { - let nix = NixTool::new(StoreConfig { - nix_tool: cli.nix_tool.clone(), - extra_args: Vec::new(), - }); - - let output = nix.build(&derived_file.path)?; - let stdout = str::from_utf8(&output.stdout)?; - let drv_output = StorePath::new(stdout.trim())?; - - let symlink_source = if let Some(rel_path) = &derived_file.rel_path { - drv_output.path().join(rel_path) - } else { - drv_output.path().to_path_buf() - }; - - if derived_file.build_path.exists() { - fs::remove_file(&derived_file.build_path)?; - } - symlink(symlink_source, &derived_file.build_path)?; - - Ok(()) -} - -fn subtool(cli: &Cli, tool: &str) -> Result { - match tool { +fn subtool( + nix_tool: NixTool, + build_dir: &Path, + store_dir: &Path, + subtool_name: &str, + targets: Vec, +) -> Result { + match subtool_name { "list" => { println!("nix-ninja subtools:"); - println!(" drv show Nix derivation generated for a target"); + println!(" drv show Nix derivation generated for a target"); + println!(" dynamic-task generate task derivation from task + discovered deps"); } "drv" => { - let nix = NixTool::new(StoreConfig { - nix_tool: cli.nix_tool.clone(), - extra_args: Vec::new(), - }); - - let derived_file = build(cli)?; - let output = nix.derivation_show(&derived_file.path.store_path())?; + let cli = Cli::parse(); + let derived_file = build(&cli, build_dir)?; + let output = nix_tool.derivation_show(&derived_file.derived_path.store_path())?; let stdout = str::from_utf8(&output.stdout)?; println!("{stdout}"); } + "dynamic-task" => { + return Ok(dynamic_task_subtool(nix_tool, store_dir, targets)); + } // Meson compatibility tools. "restat" | "clean" | "cleandead" | "compdb" => { // TODO: Implement what's necessary, I think only compdb needs to @@ -173,10 +155,20 @@ fn subtool(cli: &Cli, tool: &str) -> Result { } _ => { println!( - "Unknown subtool '{tool}'. Use '-t list' to get a list of available subtools." + "Unknown subtool '{subtool_name}'. Use '-t list' to get a list of available subtools." ); return Ok(1); } } Ok(0) } + +fn dynamic_task_subtool(nix_tool: NixTool, store_dir: &Path, targets: Vec) -> i32 { + match dynamic_task::run(nix_tool, store_dir, targets) { + Ok(code) => code, + Err(err) => { + eprintln!("nix-ninja-dynamic-task: {err}"); + 1 + } + } +} diff --git a/crates/nix-ninja/src/lib.rs b/crates/nix-ninja/src/lib.rs index 55b9d8d..614157b 100644 --- a/crates/nix-ninja/src/lib.rs +++ b/crates/nix-ninja/src/lib.rs @@ -1,4 +1,6 @@ mod build; pub mod cli; +mod local; mod relative_from; +mod subtool; mod task; diff --git a/crates/nix-ninja/src/local.rs b/crates/nix-ninja/src/local.rs new file mode 100644 index 0000000..e959cf7 --- /dev/null +++ b/crates/nix-ninja/src/local.rs @@ -0,0 +1,62 @@ +use anyhow::Result; +use nix_libstore::prelude::SingleDerivedPath; +use nix_ninja_task::derived_file::{create_symlinks, DerivedFile}; +use nix_tool::NixTool; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +pub fn build_derived_files( + nix_tool: &NixTool, + derived_files: &[DerivedFile], +) -> Result> { + let derived_paths: Vec<_> = derived_files + .iter() + .map(|df| df.derived_path.clone()) + .collect(); + + // Build derived paths so the Nix store paths exist on the host. + let store_paths = nix_tool.build(&derived_paths)?; + + // Create mapping from build_path to actual store path + let built_paths: HashMap = derived_files + .iter() + .zip(store_paths.iter()) + .map(|(df, store_path)| { + let actual_path = if let Some(rel_path) = &df.rel_path { + store_path.path().join(rel_path) + } else { + store_path.path().to_path_buf() + }; + (df.build_path.clone(), actual_path) + }) + .collect(); + + Ok(built_paths) +} + +pub fn symlink_derived_files( + nix_tool: &NixTool, + prefix: &Path, + derived_files: &[DerivedFile], +) -> Result<()> { + let derived_paths: Vec<_> = derived_files + .iter() + .map(|df| df.derived_path.clone()) + .collect(); + let store_paths = nix_tool.build(&derived_paths)?; + + // Create new DerivedFiles with opaque store paths instead of placeholders + let opaque_files: Vec = derived_files + .iter() + .zip(store_paths.iter()) + .map(|(df, store_path)| DerivedFile { + derived_path: SingleDerivedPath::Opaque(store_path.clone()), + build_path: df.build_path.clone(), + rel_path: df.rel_path.clone(), + }) + .collect(); + + create_symlinks(prefix, opaque_files, true)?; + + Ok(()) +} diff --git a/crates/nix-ninja/src/subtool/dynamic_task.rs b/crates/nix-ninja/src/subtool/dynamic_task.rs new file mode 100644 index 0000000..6cadb3d --- /dev/null +++ b/crates/nix-ninja/src/subtool/dynamic_task.rs @@ -0,0 +1,186 @@ +use anyhow::{anyhow, Result}; +use nix_libstore::prelude::{Derivation, StorePath}; +use nix_ninja_task::derived_file::DerivedFile; +use nix_tool::NixTool; +use std::{ + collections::{HashMap, HashSet}, + env, fs, + path::{Path, PathBuf}, +}; + +use crate::task::discover_c_includes; + +pub fn run(nix_tool: NixTool, store_dir: &Path, targets: Vec) -> Result { + let input_drv = targets + .first() + .ok_or_else(|| anyhow!("Expected derivation path as argument"))?; + + let drv_json = fs::read_to_string(input_drv)?; + let mut drv = Derivation::from_json(&drv_json)?; + println!("nix-ninja-dynamic-task: Processing derivation {}", drv.name); + + // Stage 1: Prepare build environment + let (build_dir, built_paths) = prepare_build_environment()?; + + // Stage 2: Discover dynamic dependencies + let (discovered_deps, discovered_store_paths) = + discover_dynamic_dependencies(&nix_tool, store_dir, &build_dir, &drv, built_paths)?; + + // Stage 3: Update derivation with discovered dependencies + let new_deps = + update_derivation_with_discoveries(&mut drv, discovered_deps, discovered_store_paths)?; + + // Print discovery results + if !new_deps.is_empty() { + for dep in &new_deps { + println!( + "nix-ninja-dynamic-task: Discovered dependency: {}", + dep.derived_path.store_path() + ); + } + } else { + println!("nix-ninja-dynamic-task: No new dependencies discovered"); + } + + let drv_path = nix_tool.derivation_add(&drv)?; + let out = env::var("out").map_err(|_| anyhow!("Expected $out to be set"))?; + fs::copy(drv_path.path(), out)?; + + println!("nix-ninja-dynamic-task: Added derivation to store: {drv_path}"); + Ok(0) +} + +/// Stage 1: Prepare build environment by setting up directories, copying source, +/// and building derived files +fn prepare_build_environment() -> Result<(PathBuf, HashMap)> { + // Set up build directory using NIX_BUILD_TOP + let build_top = + env::var("NIX_BUILD_TOP").map_err(|_| anyhow!("Expected $NIX_BUILD_TOP to be set"))?; + let source_dir = PathBuf::from(build_top).join("source"); + let build_dir = source_dir.join("build"); + fs::create_dir_all(&build_dir)?; + env::set_current_dir(&build_dir)?; + + // Copy $src into source_dir so we can discover dependencies from $src. + let src = env::var("src").map_err(|_| anyhow!("Expected $src to be set"))?; + copy_dir_all(PathBuf::from(src), &source_dir)?; + + // Get NIX_NINJA_INPUTS from process environment, these are the built + // inputs to a derivation that may have discovered inputs and should be + // scanned. + let inputs = env::var("NIX_NINJA_INPUTS") + .map_err(|_| anyhow!("NIX_NINJA_INPUTS not found in process environment"))?; + + // Get built inputs for dynamic dependency discovery + let derived_files: Vec = inputs + .split_whitespace() + .filter_map(|encoded| DerivedFile::from_encoded(encoded).ok()) + .collect(); + + // In derivation mode, built files are already available as store paths + // Create the virtual paths mapping from the derived files + let built_paths: HashMap = derived_files + .iter() + .map(|df| (df.build_path.clone(), PathBuf::from(df))) + .collect(); + + Ok((build_dir, built_paths)) +} + +/// Stage 2: Discover dynamic dependencies by analyzing built files for includes +pub fn discover_dynamic_dependencies( + nix_tool: &NixTool, + store_dir: &Path, + build_dir: &Path, + drv: &Derivation, + built_paths: HashMap, +) -> Result<(Vec, Vec)> { + let cmdline = drv + .args + .first() + .ok_or_else(|| anyhow!("No command line found in derivation"))?; + + let files: Vec = built_paths.keys().cloned().collect(); + + discover_c_includes( + nix_tool, + store_dir, + build_dir, + cmdline, + files, + Some(built_paths), + ) +} + +/// Stage 3: Update derivation with discovered dependencies and store paths +/// Returns the list of new dependencies that were added +pub fn update_derivation_with_discoveries( + drv: &mut Derivation, + discovered_deps: Vec, + discovered_store_paths: Vec, +) -> Result> { + for store_path in &discovered_store_paths { + drv.add_input_src(store_path); + } + + // Get NIX_NINJA_INPUTS from derivation environment, these are the existing + // inputs of the derivation without the discovered inputs. + let drv_inputs = drv.env.get("NIX_NINJA_INPUTS").map_or("", |v| v); + + // Parse existing derivation inputs into a HashSet for deduplication + let mut input_set: HashSet = drv_inputs + .split_whitespace() + .map(|s| s.to_string()) + .collect(); + + let mut new_deps = Vec::new(); + for derived_file in discovered_deps { + let encoded = derived_file.to_encoded(); + + // Skip if already in input set + if input_set.contains(&encoded) { + continue; + } + + new_deps.push(derived_file.clone()); + input_set.insert(encoded); + drv.add_derived_path(&derived_file.derived_path); + } + + if !new_deps.is_empty() { + // Update NIX_NINJA_INPUTS with sorted list + let mut inputs: Vec = input_set.into_iter().collect(); + inputs.sort(); + drv.set_env("NIX_NINJA_INPUTS", &inputs.join(" ")); + } + + Ok(new_deps) +} + +/// Recursively copies a directory and all its contents +fn copy_dir_all(src: PathBuf, dst: &Path) -> Result<()> { + use std::os::unix::fs::symlink; + use walkdir::WalkDir; + + for entry in WalkDir::new(&src) { + let entry = entry?; + + let relative_path = entry.path().strip_prefix(&src)?; + let dest_path = dst.join(relative_path); + + if let Some(parent) = dest_path.parent() { + fs::create_dir_all(parent)?; + } + + let file_type = entry.file_type(); + if file_type.is_dir() { + fs::create_dir_all(&dest_path)?; + } else if file_type.is_symlink() { + let target = fs::read_link(entry.path())?; + symlink(target, dest_path)?; + } else { + fs::copy(entry.path(), dest_path)?; + } + } + Ok(()) +} diff --git a/crates/nix-ninja/src/subtool/mod.rs b/crates/nix-ninja/src/subtool/mod.rs new file mode 100644 index 0000000..6221305 --- /dev/null +++ b/crates/nix-ninja/src/subtool/mod.rs @@ -0,0 +1 @@ +pub mod dynamic_task; diff --git a/crates/nix-ninja/src/task.rs b/crates/nix-ninja/src/task.rs index ec2c350..dc1d240 100644 --- a/crates/nix-ninja/src/task.rs +++ b/crates/nix-ninja/src/task.rs @@ -1,4 +1,6 @@ +use crate::local; use crate::relative_from::relative_from; +use crate::subtool::dynamic_task; use anyhow::{anyhow, Error, Result}; use deps_infer::c_include_parser; use n2::{ @@ -9,38 +11,56 @@ use nix_libstore::prelude::*; use nix_ninja_task::derived_file::DerivedFile; use nix_tool::NixTool; use regex::Regex; +use sha2::{Digest, Sha256}; use std::{ collections::{HashMap, HashSet}, env, fs, ops::Deref, - path::PathBuf, - sync::mpsc, + path::{Path, PathBuf}, + sync::{mpsc, Arc, Mutex}, }; use walkdir::WalkDir; use which::which; #[derive(Clone)] pub struct Tools { - pub nix: NixTool, pub cc: StorePath, pub coreutils: StorePath, + pub nix: StorePath, + pub nix_tool: NixTool, + pub nix_ninja: StorePath, pub nix_ninja_task: StorePath, pub patchelf: StorePath, } +impl Tools { + pub fn new(nix_tool: NixTool) -> Result { + Ok(Tools { + cc: which_store_path("cc")?, + coreutils: which_store_path("coreutils")?, + nix: which_store_path("nix")?, + nix_tool, + nix_ninja: which_store_path("nix-ninja")?, + nix_ninja_task: which_store_path("nix-ninja-task")?, + patchelf: which_store_path("patchelf")?, + }) + } +} + /// Task represents a fully evaluated Ninja build target. /// /// A task contains all the context to generate a Nix derivation for the build /// target. +#[derive(Clone)] struct Task { name: String, system: String, - env_vars: HashMap, + wrapper_vars: HashMap, + input_srcs: Vec, build_dir: PathBuf, build_deps: BuildDependencies, store_dir: PathBuf, - store_regex: Regex, cmdline: Option, desc: Option, @@ -62,28 +82,32 @@ impl Deref for Task { /// BuildResult is the output of a Task. pub struct BuildResult { pub bid: BuildId, + pub derived_path: Option, pub derived_files: Vec, pub err: Option, } +#[derive(Clone)] pub struct RunnerConfig { pub system: String, pub build_dir: PathBuf, pub store_dir: PathBuf, + pub is_output_derivation: bool, } /// Runner is an async runtime that spawns threads for each task. pub struct Runner { pub derived_files: HashMap, build_dir_inputs: HashMap, - extra_inputs: HashMap>, tx: mpsc::Sender, rx: mpsc::Receiver, tools: Tools, config: RunnerConfig, - env_vars: HashMap, + wrapper_vars: HashMap, + wrapper_store_paths: Vec, store_regex: Regex, + nix_build_lock: Arc>, } impl Runner { @@ -95,22 +119,42 @@ impl Runner { ); let store_regex = Regex::new(&pattern)?; - let mut env_vars = HashMap::new(); + let mut wrapper_vars = HashMap::new(); for (key, value) in env::vars() { - env_vars.insert(key, value); + if ["NIX_LDFLAGS", "NIX_CFLAGS_COMPILE"].contains(&key.as_str()) + || key.starts_with("NIX_CC_WRAPPER") + || key.starts_with("NIX_BINTOOLS_WRAPPER") + { + wrapper_vars.insert(key, value); + } + } + + // Remove -frandom-seed from NIX_CFLAGS_COMPILE as we'll calculate it + // per task derivation. Otherwise this will be different every time + // breaking incrementality. + if let Some(cflags) = wrapper_vars.get_mut("NIX_CFLAGS_COMPILE") { + *cflags = remove_frandom_seed(cflags); + } + + // Extract store paths from wrapper variables once + let mut wrapper_store_paths = Vec::new(); + for value in wrapper_vars.values() { + let found_store_paths = extract_store_paths(&store_regex, value)?; + wrapper_store_paths.extend(found_store_paths); } let (tx, rx) = mpsc::channel(); Ok(Runner { derived_files: HashMap::new(), build_dir_inputs: HashMap::new(), - extra_inputs: HashMap::new(), tx, rx, tools, config, - env_vars, + wrapper_vars, + wrapper_store_paths, store_regex, + nix_build_lock: Arc::new(Mutex::new(())), }) } @@ -133,60 +177,13 @@ impl Runner { let path = entry.into_path(); let derived_file = - new_opaque_file(&self.tools.nix, &self.config.build_dir, path.clone())?; + new_opaque_file(&self.tools.nix_tool, &self.config.build_dir, path.clone())?; let fid = self.add_derived_file(files, derived_file.clone()); self.build_dir_inputs.insert(fid, derived_file); } Ok(()) } - pub fn add_extra_inputs( - &mut self, - files: &mut graph::GraphFiles, - encoded_inputs: Vec, - ) -> Result<()> { - for encoded in encoded_inputs { - // Split by colon to separate path from source - let parts: Vec<&str> = encoded.split(':').collect(); - if parts.len() != 2 { - return Err(anyhow!( - "Expected one ':' in encoded input but got {}", - encoded - )); - } - let (target, extra_input_path) = (parts[0], PathBuf::from(parts[1])); - - let Some(fid) = files.lookup(target) else { - return Err(anyhow!("Could not find target in extra input: {}", target)); - }; - - let file = &files.by_id[fid]; - let Some(bid) = file.input else { - return Err(anyhow!( - "Target in extra input is not an output of a build: {}", - target - )); - }; - - let mut extra_inputs = match self.extra_inputs.get(&bid) { - Some(extra_inputs) => extra_inputs.to_owned(), - None => Vec::new(), - }; - - let derived_file = new_opaque_file( - &self.tools.nix, - &self.config.build_dir, - extra_input_path.clone(), - )?; - self.add_derived_file(files, derived_file.clone()); - - extra_inputs.push(derived_file); - self.extra_inputs.insert(bid, extra_inputs); - } - - Ok(()) - } - pub fn start( &mut self, files: &mut graph::GraphFiles, @@ -196,16 +193,43 @@ impl Runner { let tx = self.tx.clone(); let tools = self.tools.clone(); - let task = self.new_task(files, bid, build)?; + let task = self.new_task(files, build)?; + let config = self.config.clone(); + let nix_build_lock = self.nix_build_lock.clone(); std::thread::spawn(move || { - let (derived_files, err) = match build_task_derivation(tools, task) { - Ok(derived_files) => (derived_files, None), - Err(err) => (Vec::new(), Some(err)), + let (derived_path, err) = + match build_task_derivation(tools.clone(), task.clone()) { + Ok(drv) => match handle_derivation_result( + tools.clone(), + task.clone(), + drv.clone(), + &config, + nix_build_lock, + ) { + Ok(final_derived_path) => (Some(final_derived_path), None), + Err(err) => (None, Some(err.context(format!("Failed to handle derivation result for task '{}' (derivation: {})\nDerivation JSON:\n{}", task.name, drv.name, drv.to_json_pretty().unwrap_or_else(|_| "Failed to serialize derivation".to_string()))))), + }, + Err(err) => (None, Some(err.context(format!("Failed to build task derivation for task '{}'", task.name)))), + }; + + // Create DerivedFiles for all outputs if successful + let derived_files = if let Some(ref final_derived_path) = derived_path { + let mut drv_outputs: Vec = Vec::new(); + for fid in task.outs() { + let file = &task.files[fid]; + let built_file = + new_built_file(final_derived_path.clone(), file.name.clone().into()); + drv_outputs.push(built_file); + } + drv_outputs + } else { + Vec::new() }; let result = BuildResult { bid, + derived_path, derived_files, err, }; @@ -226,9 +250,16 @@ impl Runner { } eprintln!("Backtrace: {}", err.backtrace()); + + let debug_info = if let Some(derived_path) = &result.derived_path { + format!("derivation: {derived_path}") + } else { + format!("build_id: {:?}", result.bid) + }; + return Err(anyhow!( - "Failed to build task derivation for {:?}: {}", - result.bid, + "Failed to build task derivation for {}: {}", + debug_info, err )); } @@ -256,12 +287,7 @@ impl Runner { fid } - fn new_task( - &mut self, - files: &mut graph::GraphFiles, - bid: BuildId, - build: &Build, - ) -> Result { + fn new_task(&mut self, files: &mut graph::GraphFiles, build: &Build) -> Result { let store_dir = self.config.store_dir.to_string_lossy().into_owned(); // Provide the task access to all the original files for explicit @@ -293,7 +319,7 @@ impl Runner { } let input = new_opaque_file( - &self.tools.nix, + &self.tools.nix_tool, &self.config.build_dir, file.name.clone().into(), )?; @@ -354,23 +380,24 @@ impl Runner { input_set.insert(input.build_path.clone(), input.clone()); } - if let Some(extra_inputs) = self.extra_inputs.get(&bid) { - for input in extra_inputs { - input_set.insert(input.build_path.clone(), input.clone()); - } - } - let mut inputs: Vec = input_set.into_values().collect(); inputs.sort(); + // Extract store paths from cmdline and add pre-extracted wrapper store paths + let mut input_srcs = self.wrapper_store_paths.clone(); + if let Some(cmdline) = &build.cmdline { + let found_store_paths = extract_store_paths(&self.store_regex, cmdline)?; + input_srcs.extend(found_store_paths); + } + Ok(Task { name: format!("ninja-build-{name}"), system: self.config.system.clone(), - env_vars: self.env_vars.clone(), + wrapper_vars: self.wrapper_vars.clone(), + input_srcs, build_dir: self.config.build_dir.clone(), build_deps: build.dependencies.clone(), store_dir: self.config.store_dir.clone(), - store_regex: self.store_regex.clone(), cmdline: build.cmdline.clone(), desc: build.desc.clone(), deps: build.deps.clone(), @@ -381,11 +408,11 @@ impl Runner { } } -fn build_task_derivation(tools: Tools, task: Task) -> Result> { +fn build_task_derivation(tools: Tools, task: Task) -> Result { let cmdline = match &task.cmdline { Some(c) => c, None => { - return process_phony(tools, task); + return Err(anyhow!("Phony tasks not yet supported")); } }; @@ -400,35 +427,35 @@ fn build_task_derivation(tools: Tools, task: Task) -> Result> { drv.add_arg(&format!("--description={desc}")); } - // Propagate env var from build environment to the task. - for (key, value) in &task.env_vars { - // TODO: Currently necessary because we're using a gcc wrapped by - // nixpkgs that has implicit deps inside env vars like NIX_LDFLAGS, - // NIX_CFLAGS_COMPILE. Is there a better way? - if !["NIX_LDFLAGS", "NIX_CFLAGS_COMPILE"].contains(&key.as_str()) - && !key.starts_with("NIX_CC_WRAPPER") - { - continue; - } + // Propagate wrapper environment variables to the task. + for (key, value) in &task.wrapper_vars { + let final_value = if key == "NIX_CFLAGS_COMPILE" { + // Also add a deterministic random seed based on the task's + // cmdline for reproducible builds. + let deterministic_seed = generate_frandom_seed(cmdline); + format!("{value} -frandom-seed={deterministic_seed}") + } else { + value.clone() + }; + drv.set_env(key, &final_value); + } - drv.add_env(key, value); - let found_store_paths = extract_store_paths(&task.store_regex, value)?; - for store_path in found_store_paths { - drv.add_input_src(&store_path.to_string()); - } + // Add pre-extracted store paths from cmdline and wrapper vars + for store_path in &task.input_srcs { + drv.add_input_src(store_path); } // Needed by all tasks. - drv.add_input_src(&tools.cc.to_string()) - .add_input_src(&tools.coreutils.to_string()) - .add_input_src(&tools.nix_ninja_task.to_string()) - .add_input_src(&tools.patchelf.to_string()); + drv.add_input_src(&tools.cc) + .add_input_src(&tools.coreutils) + .add_input_src(&tools.nix_ninja_task) + .add_input_src(&tools.patchelf); // Add all ninja build inputs. let mut input_set: HashSet = HashSet::new(); for input in &task.inputs { // Declare input for derivation. - add_derived_path(&mut drv, input); + drv.add_derived_path(&input.derived_path); // Encode input for nix-ninja-task. let encoded = &input.to_encoded(); @@ -440,42 +467,41 @@ fn build_task_derivation(tools: Tools, task: Task) -> Result> { let mut discovered_inputs: Vec = Vec::new(); if let Some(deps) = &task.deps { if deps == "gcc" { - let mut file_set: HashSet = HashSet::new(); - // Only explict inputs are processed by gcc. - for input in &task.inputs { - let source = match input.path { - SingleDerivedPath::Opaque(_) => input.build_path.clone(), - SingleDerivedPath::Built(_) => { - continue; - } - }; - file_set.insert(source); - } + // Only opaque inputs are processed by gcc + let files: Vec = task + .inputs + .iter() + .filter_map(|input| match input.derived_path { + SingleDerivedPath::Opaque(_) => Some(input.build_path.clone()), + SingleDerivedPath::Built(_) => None, // Will be filled in by dynamic task derivation + }) + .collect(); + + let (discovered_deps, discovered_store_paths) = discover_c_includes( + &tools.nix_tool, + &task.store_dir, + &task.build_dir, + cmdline, + files, + None, + )?; - let files: Vec = file_set.clone().into_iter().collect(); - let c_includes = c_include_parser::retrieve_c_includes(cmdline, files)?; + // Add discovered store paths as input sources only + for store_path in discovered_store_paths { + drv.add_input_src(&store_path); + } - for include in c_includes { - if let Ok(relative) = include.strip_prefix(&task.store_dir) { - if let Some(hash_path) = relative.components().next().map(|c| c.as_os_str()) { - let store_path = task.store_dir.join(hash_path); - drv.add_input_src(&store_path.to_string_lossy()); - continue; - } - } + // Add discovered deps to NIX_NINJA_INPUTS and derivation + for derived_file in discovered_deps { + let encoded = derived_file.to_encoded(); - let derived_file = new_opaque_file(&tools.nix, &task.build_dir, include)?; - // Skip paths that are already in the task inputs. - if file_set.contains(&derived_file.build_path) { + // Skip if already in input_set + if input_set.contains(&encoded) { continue; } - let encoded = &derived_file.to_encoded(); - // Should be source-linked. - input_set.insert(encoded.clone()); - // Should be included as an input to derivation. - add_derived_path(&mut drv, &derived_file); - // Should be returned back to the Runner as a discovered input. + input_set.insert(encoded); + drv.add_derived_path(&derived_file.derived_path); discovered_inputs.push(derived_file); } } @@ -485,7 +511,7 @@ fn build_task_derivation(tools: Tools, task: Task) -> Result> { let mut inputs: Vec = input_set.into_iter().collect(); inputs.sort(); - drv.add_env("NIX_NINJA_INPUTS", &inputs.join(" ")); + drv.set_env("NIX_NINJA_INPUTS", &inputs.join(" ")); // Add all ninja build outputs. let mut outputs: Vec = Vec::new(); @@ -504,7 +530,7 @@ fn build_task_derivation(tools: Tools, task: Task) -> Result> { ); outputs.push(encoded); } - drv.add_env("NIX_NINJA_OUTPUTS", &outputs.join(" ")); + drv.set_env("NIX_NINJA_OUTPUTS", &outputs.join(" ")); { // Prepare $PATH to have coreutils. @@ -522,41 +548,161 @@ fn build_task_derivation(tools: Tools, task: Task) -> Result> { // TODO: If you don't find it it's ok, e.g. ./generated_binary let cmdline_path = which_store_path(cmdline_binary)?; - drv.add_input_src(&cmdline_path.to_string()); + drv.add_input_src(&cmdline_path); path.push(format!("{cmdline_path}/bin")); - drv.add_env("PATH", &path.join(":")); - } - - // The cmdline may refer to hardcoded store paths as they were found - // by the build.ninja generator (e.g. meson). We need to extract them - // and add as inputSrcs. - let found_store_paths = extract_store_paths(&task.store_regex, cmdline)?; - for store_path in found_store_paths { - drv.add_input_src(&store_path.to_string()); + drv.set_env("PATH", &path.join(":")); } + // For debugging purposes: // let json = &drv.to_json_pretty()?; - // println!("Derivation:\n{}", json); - - // Add the derivation to the Nix store. - let drv_path = tools.nix.derivation_add(&drv)?; - - // Collect all the built outputs of the derivation so it can be referenced - // as inputs by dependent builds. - let mut drv_outputs: Vec = Vec::new(); - for fid in task.outs() { - let file = &task.files[fid]; - let built_file = new_built_file(&drv_path, file.name.clone().into()); - drv_outputs.push(built_file); + // println!("Derivation:\n{json}"); + + Ok(drv) +} + +// For dynamic tasks, we generate an intermediary derivation that will then +// generate the final derivation with any discovered dependencies from its +// dependencies. +// +// For example, if a task derivation depends on generated.cc, we also want +// to depend on any headers generated.cc includes but we don't know that +// without the derivation that built generated.cc also scanned for includes +// and wrote that to its $deps output. +fn build_dynamic_task_derivation( + tools: Tools, + input_drv: Derivation, + built_inputs: Vec, +) -> Result { + let mut drv = Derivation::new( + &format!("{}.drv", input_drv.name), + &input_drv.system, + &format!("{}/bin/nix-ninja", tools.nix_ninja), + ); + drv.add_input_src(&tools.nix_ninja) + .add_input_src(&tools.nix); + + // Add built inputs as dependencies so the dynamic task has access to them for scanning + for built_input in &built_inputs { + drv.add_derived_path(&built_input.derived_path); } - // Return both discovered inputs & derivation outputs. - discovered_inputs.extend(drv_outputs); - Ok(discovered_inputs) + // Encode built inputs for NIX_NINJA_INPUTS so dynamic task can process them + let mut inputs: Vec = built_inputs + .iter() + .map(|input| input.to_encoded()) + .collect(); + inputs.sort(); + drv.set_env("NIX_NINJA_INPUTS", &inputs.join(" ")); + + drv.add_ca_output("out", HashAlgorithm::Sha256, OutputHashMode::Text); + drv.set_env( + "out", + &Placeholder::standard_output("out") + .render() + .to_string_lossy(), + ); + + // Add the dynamic-task subtool argument + drv.add_arg("-t").add_arg("dynamic-task"); + + // Propagate sources to dynamic task for it discover inputs. + let src = env::var("src").map_err(|_| anyhow!("Expected $src to be set"))?; + drv.set_env("src", &src); + let src_store_path = StorePath::new(src.clone())?; + drv.add_input_src(&src_store_path); + + // Set up PATH to include nix binary + let path = format!("{}/bin", tools.nix); + drv.set_env("PATH", &path); + + // Requires extra experimental features to add our derivations. + drv.set_env( + "NIX_CONFIG", + "extra-experimental-features = nix-command ca-derivations dynamic-derivations", + ); + + // Require recursive-nix to allow nix commands inside the build + drv.set_env("requiredSystemFeatures", "recursive-nix"); + + // Serialize the derivation to a temporary file and add to nix store + let drv_json = input_drv.to_json()?; + let temp_file = std::env::temp_dir().join(format!("drv-{}.json", input_drv.name)); + fs::write(&temp_file, &drv_json)?; + let drv_json_path = tools.nix_tool.store_add(&temp_file)?; + + // Add derivation.json as input dependency and argument + drv.add_input_src(&drv_json_path); + drv.add_arg(&drv_json_path.to_string()); + + Ok(drv) } -fn process_phony(_: Tools, _: Task) -> Result> { - Err(anyhow!("Unimplemented")) +/// Handles the result of build_task_derivation, deciding whether to wrap with +/// a dynamic task derivation or use the derivation directly. +fn handle_derivation_result( + tools: Tools, + task: Task, + mut drv: Derivation, + config: &RunnerConfig, + nix_build_lock: Arc>, +) -> Result { + // Collect built inputs when deps == "gcc" for dynamic dependency discovery + let built_inputs: Vec = if task.deps.as_ref() == Some(&"gcc".to_string()) { + task.inputs + .iter() + .filter(|input| matches!(input.derived_path, SingleDerivedPath::Built(_))) + .cloned() + .collect() + } else { + Vec::new() + }; + + if !built_inputs.is_empty() { + // If we're in Nix sandbox, create a dynamic derivation to handle + // dynamic dependencies. + if config.is_output_derivation { + let dynamic_drv = build_dynamic_task_derivation(tools.clone(), drv, built_inputs)?; + let dynamic_drv_path = tools.nix_tool.derivation_add(&dynamic_drv)?; + Ok(SingleDerivedPath::Built(SingleDerivedPathBuilt::new( + dynamic_drv_path, + "out".to_string(), + ))) + } else { + // Otherwise, symlink these built_inputs into build_dir and do + // dependency discovery locally. + + let built_paths = { + // Serialize nix build calls to prevent log output interleaving + // when multiple tasks with dynamic dependencies run concurrently + // + // TODO: This isn't ideal, perhaps we buffer the logs or emit + // JSON events for log aggregation. + let _lock = nix_build_lock.lock().unwrap(); + local::build_derived_files(&tools.nix_tool, &built_inputs)? + }; + + let (discovered_deps, discovered_store_paths) = + dynamic_task::discover_dynamic_dependencies( + &tools.nix_tool, + &config.store_dir, + &config.build_dir, + &drv, + built_paths, + )?; + + dynamic_task::update_derivation_with_discoveries( + &mut drv, + discovered_deps, + discovered_store_paths, + )?; + + let drv_path = tools.nix_tool.derivation_add(&drv)?; + Ok(SingleDerivedPath::Opaque(drv_path)) + } + } else { + let drv_path = tools.nix_tool.derivation_add(&drv)?; + Ok(SingleDerivedPath::Opaque(drv_path)) + } } pub fn which_store_path(binary_name: &str) -> Result { @@ -601,40 +747,83 @@ fn new_opaque_file( let canonical_path = fs::canonicalize(&path)?; let store_path = nix.store_add(&canonical_path)?; Ok(DerivedFile { - path: SingleDerivedPath::Opaque(store_path.clone()), + derived_path: SingleDerivedPath::Opaque(store_path.clone()), build_path: relative_path, rel_path: None, // None for opaque files - store path points directly to file }) } -fn new_built_file(drv_path: &StorePath, path: PathBuf) -> DerivedFile { - let derived_built = SingleDerivedPathBuilt { - drv_path: drv_path.clone(), - output: normalize_output(&path.to_string_lossy()), - }; +fn new_built_file(derived_path: SingleDerivedPath, build_path: PathBuf) -> DerivedFile { + let output_name = normalize_output(&build_path.to_string_lossy()); + let derived_built = SingleDerivedPathBuilt::from_derived_path(derived_path, output_name); DerivedFile { - path: SingleDerivedPath::Built(derived_built), - build_path: path.clone(), - rel_path: Some(path), // For built files, rel_path same as build_path + derived_path: SingleDerivedPath::Built(derived_built), + build_path: build_path.clone(), + rel_path: Some(build_path), // For built files, rel_path same as build_path } } -fn add_derived_path(drv: &mut Derivation, derived_file: &DerivedFile) { - match &derived_file.path { - SingleDerivedPath::Opaque(store_path) => { - drv.add_input_src(&store_path.to_string()); +// Derivation outputs cannot have `/` in them as its suffixed to the derivation +// store path. +fn normalize_output(output: &str) -> String { + output.replace('/', "-") +} + +/// Discovers C include dependencies from a command line and input files. +/// Returns (discovered_deps, discovered_store_paths) where: +/// - discovered_deps: DerivedFiles that need to be encoded and added to NIX_NINJA_INPUTS +/// - discovered_store_paths: Store paths that only need to be added as input sources +pub fn discover_c_includes( + nix_tool: &NixTool, + store_dir: &Path, + build_dir: &Path, + cmdline: &str, + files: Vec, + virtual_paths: Option>, +) -> Result<(Vec, Vec)> { + let c_includes = c_include_parser::retrieve_c_includes(cmdline, files.clone(), virtual_paths)?; + let mut discovered_deps = Vec::new(); + let mut discovered_store_paths = Vec::new(); + + // Convert input files to a set for filtering + let input_files: HashSet = files.into_iter().collect(); + + for include in c_includes { + // Skip input files - we only want to discover new dependencies + if input_files.contains(&include) { + continue; } - SingleDerivedPath::Built(derived_built) => { - drv.add_input_drv( - &derived_built.drv_path.to_string(), - vec![derived_built.output.clone()], - ); + + // Check if include is from Nix store or a regular file + if let Ok(relative) = include.strip_prefix(store_dir) { + if let Some(hash_path) = relative.components().next().map(|c| c.as_os_str()) { + let store_path = StorePath::new(store_dir.join(hash_path))?; + discovered_store_paths.push(store_path); + continue; + } } + + // Regular file, add to nix store and treat as derived dependency + let derived_file = new_opaque_file(nix_tool, build_dir, include)?; + discovered_deps.push(derived_file); } + + Ok((discovered_deps, discovered_store_paths)) } -// Derivation outputs cannot have `/` in them as its suffixed to the derivation -// store path. -fn normalize_output(output: &str) -> String { - output.replace('/', "-") +/// Removes -frandom-seed flag from a string of CFLAGS. +fn remove_frandom_seed(flags: &str) -> String { + flags + .split_whitespace() + .filter(|flag| !flag.starts_with("-frandom-seed=")) + .collect::>() + .join(" ") +} + +/// Generates -frandom-seed based on the task's cmdline. +fn generate_frandom_seed(cmdline: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(cmdline.as_bytes()); + let result = hasher.finalize(); + format!("{result:x}")[..16].to_string() } diff --git a/crates/nix-tool/src/lib.rs b/crates/nix-tool/src/lib.rs index d919aff..85a8375 100644 --- a/crates/nix-tool/src/lib.rs +++ b/crates/nix-tool/src/lib.rs @@ -5,6 +5,7 @@ use nix_libstore::store_path::StorePath; use std::ffi::OsStr; use std::io::Write; use std::process::{Command, Output}; +use std::str; /// Configuration for Nix store operations #[derive(Debug, Clone)] @@ -35,11 +36,12 @@ impl NixTool { NixTool { config } } - pub fn build(&self, derived_path: &SingleDerivedPath) -> Result { - let installable = &derived_path.to_string(); + pub fn build(&self, derived_paths: &[SingleDerivedPath]) -> Result> { + let installables: Vec = derived_paths.iter().map(|p| p.to_string()).collect(); let output = Command::new(&self.config.nix_tool) .args(&self.config.extra_args) - .args(["build", "-L", "--no-link", "--print-out-paths", installable]) + .args(["build", "-L", "--no-link", "--print-out-paths"]) + .args(&installables) .stderr(std::process::Stdio::inherit()) .output()?; @@ -48,7 +50,13 @@ impl NixTool { return Err(anyhow!("Failed to build:\n{}", stderr)); } - Ok(output) + let stdout = str::from_utf8(&output.stdout)?; + let store_paths: Vec = stdout + .lines() + .map(|line| StorePath::new(line.trim())) + .collect::, _>>()?; + + Ok(store_paths) } /// Add a file to the Nix store diff --git a/docs/design.md b/docs/design.md index 4cc2a60..ec1870f 100644 --- a/docs/design.md +++ b/docs/design.md @@ -129,6 +129,9 @@ persisted betwen runs, but then inside nixpkgs it'll just use `mkMesonPackage`. ### Dependency inference on generated source files +NOTE: This is now done and we have removed `$NIX_NINJA_EXTRA_INPUTS`, please +see [dynamic dependencies] for how this is solved. + In `NixOS/nix`, `bison` is used to generate a `parser-tab.cc` and `parser.tab.hh` file. These files include other headers so there's a need to do dependency inference then too. @@ -205,3 +208,4 @@ absolute paths to binaries. [turtle]: https://github.com/raviqqe/turtle-build [n2]: https://github.com/evmar/n2 [dynamic derivations]: ./dynamic-derivations.md +[dynamic dependencies]: ./dynamic-deps.md diff --git a/docs/dynamic-deps.md b/docs/dynamic-deps.md new file mode 100644 index 0000000..3206e55 --- /dev/null +++ b/docs/dynamic-deps.md @@ -0,0 +1,187 @@ +# Dynamic dependency inference + +> [!IMPORTANT] +> Please pre-read [dynamic derivations] and [design notes] as I'll assume you +> already understand Nix dynamic derivations and the existing nix-ninja +> architecture. + +## The problem + +Many real-world build systems generate source files at build-time that introduce +dependencies which cannot be known until after the generation step completes. +This creates a fundamental challenge: how do you handle dependencies that only +exist after a build step runs? + +For example, consider this typical pattern: + +```build.ninja +# Generate a C source file +build generated.c: CUSTOM_COMMAND generate.sh + COMMAND = bash generate.sh + +# Compile the generated source +build main.p/generated.c.o: CC generated.c + ARGS = gcc -c generated.c +``` + +When `generate.sh` runs and produces `generated.c`, the file might contain: + +```c +#include +#include "config.h" + +const char* get_example_name() { + return EXAMPLE_NAME; +} +``` + +The dependency on `config.h` cannot be discovered until after `generated.c` is +created. When one generates a derivation for `main.p/generated.c.o` that lacks +the `config.h` input, the compilation will fail with "config.h: No such file or +directory". + +### Prior art + +Previously, we had a temporary workaround with `$NIX_NINJA_EXTRA_INPUTS`: + +```nix +nixNinjaExtraInputs = [ + "main.p/generated.c.o:config.h" +]; +``` + +This let us avoid the problem while more fundamental pieces were taking shape, +but needed to be removed entirely when we had proper handling of dynamic +dependencies. + +## Architecture + +nix-ninja solves this problem through **dynamic task derivations** - a two-phase +approach that handles dependency discovery for tasks with `deps = gcc`. The +implementation is conditional and operates differently depending on whether +nix-ninja is running inside a Nix derivation (via `mkMesonPackage`) or +interactively outside Nix. + +### Two execution modes + +**Derivation mode (`is_output_derivation = true`):** +- Tasks with built inputs and `deps = gcc` generates a wrapper derivation + known as a **dynamic task derivations**. +- They are passed a mostly complete derivation in JSON form and built inputs + that require dependency discovery. +- In the dynamic task derivation, `nix-ninja -t dynamic-task` (a subtool) is + executed to perform dependency discovery. +- Update task derivation to include all discovered dependencies. + +**Interactive mode (`is_output_derivation = false`):** +- Tasks with built inputs and `deps = gcc` trigger local dynamic dependency + discovery. +- `nix-ninja` builds these inputs locally, symlinks them to the build + directory, and discovers dependencies. + - This is different than derivation mode because we have no access to a + `$src` attribute. Just like how `nix-ninja` in interactive mode has direct + access to source files, so should dynamic dependency discovery. +- Update task derivation to include all discovered dependencies. + +### Key insight: when dynamic dependencies are needed + +Dynamic dependency handling is triggered when **both** conditions are met: +1. The task has `deps = "gcc"` (indicating it needs C include scanning) +2. The task depends on outputs from built derivations (`SingleDerivedPath::Built`) + +Static tasks with only opaque file inputs (`SingleDerivedPath::Opaque`) can perform +dependency discovery directly during derivation generation, while dynamic tasks +need the two-phase approach because their inputs are not available until build time. + +## Implementation details + +### Core functions: `build_task_derivation` and `handle_derivation_result` + +The implementation centers around two key functions in `crates/nix-ninja/src/task.rs`: + +1. **`build_task_derivation`** - Generates the base task derivation with static + dependencies and discovered dependencies for opaque inputs. +2. **`handle_derivation_result`** - Decides whether to use the derivation + directly or wrap it with a dynamic task derivation. + +## Example walkthrough + +Let's trace through how nix-ninja handles a build graph with dynamic +dependencies: + +```build.ninja +# Generate source file at build-time +build generated.c: GENERATE + command = echo '#include "config.h"' > generated.c + +# Compile generated source (this will need config.h) +build main.p/generated.c.o: CC generated.c + deps = gcc + +# Link final executable +build main: LINK main.c main.p/generated.c.o +``` + +### Derivation mode execution (`is_output_derivation = true`) + +**Step 1:** nix-ninja generates task derivations + +``` +generated.c.drv: + command: nix-ninja-task "echo '#include \"config.h\"' > generated.c" + inputs: [] + outputs: + generated.c: generated.c + +generated.c.o.drv.drv: (dynamic task derivation) + command: nix-ninja -t dynamic-task /nix/store/generated.c.o.drv.json + inputs: + generated.c.drv^generated.c (built input to scan) + /nix/store/generated.c.o.drv.json + outputs: + out: generated.c.o.drv + +main.drv: (static task - no gcc deps) + command: nix-ninja-task "gcc -o main main.c main.p/generated.c.o" + inputs: + main.c + generated.c.o.drv.drv^out^main.p-generated.c.o + outputs: + main: main +``` + +**Step 2:** Nix builds the derivation graph + +1. `generated.c.drv` builds → produces `generated.c` with `#include "config.h"` +2. `generated.c.o.drv.drv` builds: + - Scans `generated.c` → discovers dependency on `config.h` + - Updates `generated.c.o.drv` JSON with `config.h` dependency + - Outputs final `generated.c.o.drv` derivation +3. `generated.c.o.drv` builds → compiles `generated.c.o` +4. `main.drv` builds → links final executable + +### Interactive mode execution (`is_output_derivation = false`) + +**Step 1:** nix-ninja generates task derivations but handles discovery locally + +``` +generated.c.drv: (same as derivation mode) + +generated.c.o.drv.json (same as derivation mode) +``` + +**Step 2:** Local dependency discovery + +1. nix-ninja builds `generated.c.drv` and symlinks result to local build directory +2. Scans `generated.c` → discovers dependency on `config.h` +3. Updates `generated.c.o.drv` JSON with `config.h` dependency +4. Resume building task derivation out of modified JSON + +### Key differences + +- **Derivation mode** uses two-phase approach with intermediate dynamic task derivations +- **Interactive mode** performs discovery locally and updates derivations before submission +- Both modes use the same dependency discovery logic + +[dynamic derivations]: ./dynamic-derivations.md +[design notes]: ./design.md diff --git a/docs/todo.md b/docs/todo.md index 3084192..f58807c 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -1,6 +1,6 @@ # TODO -- [ ] Patchelf support +- [x] Patchelf support - Either use patchelf as subprocess or a crate like `object` to look at all linked objects, find the ones not in /nix/store and patch them to their canonical path. @@ -8,7 +8,7 @@ against symlinks even though the .so files are in /nix/store - Unwise to try to detect linking rule and modify inputs at the ninja build graph layer. -- [ ] Move away from LazyBuild n2 branch +- [x] Move away from LazyBuild n2 branch - Did a bunch of work to make n2 lazy evaluate the graph to allow me to modify $in and $out the rule evaluates against. E.g. change $in and $out to refer to /nix/store paths. @@ -32,11 +32,11 @@ - [ ] Writing derivation caching for local mode - Need to adopt something similar to n2's db or ninja's deps cache - Scheduler needs to do mtime dirtying to write derivations -- [ ] Dynamic derivation to infer dependencies +- [x] Dynamic derivation to infer dependencies - Support dep_infer for SingleDerivedPathBuilt, i.e. .cc files that were generated by another derivation. - [ ] Make `nix store add` async, probably biggest perf bottleneck -- [ ] Add github actions CI +- [x] Add github actions CI - [ ] Add benchmarks for generating derivations - [ ] Add benchmarks for end-to-end compilation of NixOS/Nix, for perf work upstream. diff --git a/modules/flake/examples/dynamic-deps/config.h b/modules/flake/examples/dynamic-deps/config.h new file mode 100644 index 0000000..1426c77 --- /dev/null +++ b/modules/flake/examples/dynamic-deps/config.h @@ -0,0 +1,6 @@ +#ifndef CONFIG_H +#define CONFIG_H + +#define EXAMPLE_NAME "dynamic-deps example" + +#endif \ No newline at end of file diff --git a/modules/flake/examples/dynamic-deps/generate.sh b/modules/flake/examples/dynamic-deps/generate.sh new file mode 100755 index 0000000..a2203da --- /dev/null +++ b/modules/flake/examples/dynamic-deps/generate.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# Script to generate a C++ source file that depends on config.h +cat > generated.cpp << 'EOF' +#include +#include +#include "config.h" + +const char* get_example_name() { + return EXAMPLE_NAME; +} +EOF + +echo "Generated generated.cpp with dependency on config.h" diff --git a/modules/flake/examples/dynamic-deps/main.cpp b/modules/flake/examples/dynamic-deps/main.cpp new file mode 100644 index 0000000..666e4c5 --- /dev/null +++ b/modules/flake/examples/dynamic-deps/main.cpp @@ -0,0 +1,9 @@ +#include + +// Forward declaration for the function in generated.c +const char* get_example_name(); + +int main() { + printf("Hello %s!\n", get_example_name()); + return 0; +} \ No newline at end of file diff --git a/modules/flake/examples/dynamic-deps/meson.build b/modules/flake/examples/dynamic-deps/meson.build new file mode 100644 index 0000000..91e6167 --- /dev/null +++ b/modules/flake/examples/dynamic-deps/meson.build @@ -0,0 +1,21 @@ +project('example-dynamic-deps', 'cpp', + version : '0.1.0') + +# Custom target to generate the C++ file that depends on config.h +generated_cpp = custom_target('generate_cpp', + output : 'generated.cpp', + input : 'generate.sh', + command : [find_program('bash'), '@INPUT@'], + build_by_default : true +) + +sources = [ + 'main.cpp', + generated_cpp, +] + +includes = include_directories('.') + +executable('main', + sources, + include_directories : includes) diff --git a/modules/flake/examples/header/main.cpp b/modules/flake/examples/header/main.cpp index 21f28f2..95f5e63 100644 --- a/modules/flake/examples/header/main.cpp +++ b/modules/flake/examples/header/main.cpp @@ -1,4 +1,5 @@ #include +#include #include "header.h" int main() { diff --git a/modules/flake/examples/nix/default.nix b/modules/flake/examples/nix/default.nix index e73014a..fb48c94 100644 --- a/modules/flake/examples/nix/default.nix +++ b/modules/flake/examples/nix/default.nix @@ -37,14 +37,6 @@ mkMesonPackage { inherit src; target = "src/nix/nix"; - nixNinjaExtraInputs = [ - "src/libexpr/libnixexpr.so.p/meson-generated_.._parser-tab.cc.o:../src/libexpr/parser.y" - "src/libexpr/libnixexpr.so.p/meson-generated_.._lexer-tab.cc.o:../src/libexpr/parser.y" - "src/libexpr/libnixexpr.so.p/meson-generated_.._lexer-tab.cc.o:../src/libexpr/lexer.l" - "src/libexpr/libnixexpr.so.p/eval.cc.o:../src/libexpr/parser.y" - "src/libexpr/libnixexpr.so.p/lexer-helpers.cc.o:../src/libexpr/parser.y" - ]; - nativeBuildInputs = [ aws-sdk-cpp bison diff --git a/modules/flake/overlays.nix b/modules/flake/overlays.nix index 873e5e3..9860ee0 100644 --- a/modules/flake/overlays.nix +++ b/modules/flake/overlays.nix @@ -75,6 +75,8 @@ "Cargo.{toml,lock}" "crates/nix-{libstore,ninja-task}/Cargo.toml" "crates/nix-{libstore,ninja-task}/**/*.rs" + "crates/deps-infer/Cargo.toml" + "crates/deps-infer/**/*.rs" ]; }; }); @@ -89,6 +91,7 @@ name = "example-header"; src = ./examples/header; target = "hello"; + nativeBuildInputs = [ self.nlohmann_json ]; }; example-multi-source = self.mkMesonPackage { @@ -103,6 +106,13 @@ target = "main"; }; + example-dynamic-deps= self.mkMesonPackage { + name = "example-dynamic-deps"; + src = ./examples/dynamic-deps; + target = "main"; + nativeBuildInputs = [ self.nlohmann_json self.pkg-config ]; + }; + example-nix = self.callPackage ./examples/nix { src = inputs.nix; }; }; diff --git a/modules/flake/packages.nix b/modules/flake/packages.nix index ebc7c07..f3079e5 100644 --- a/modules/flake/packages.nix +++ b/modules/flake/packages.nix @@ -22,6 +22,7 @@ example-header = pkgs.example-header.target; example-multi-source = pkgs.example-multi-source.target; example-shared-lib = pkgs.example-shared-lib.target; + example-dynamic-deps = pkgs.example-dynamic-deps.target; example-nix = pkgs.example-nix.target; }; diff --git a/modules/flake/pkgs/mkMesonPackage/default.nix b/modules/flake/pkgs/mkMesonPackage/default.nix index 861a753..7f9c5bf 100644 --- a/modules/flake/pkgs/mkMesonPackage/default.nix +++ b/modules/flake/pkgs/mkMesonPackage/default.nix @@ -12,15 +12,12 @@ , src , target , nativeBuildInputs ? [ ] -, nixNinjaExtraInputs ? [ ] , ... }@args': let normalizedTarget = builtins.replaceStrings ["/"] ["-"] target; - extraInputs = builtins.concatStringsSep "," nixNinjaExtraInputs; - ninjaDrv = stdenv.mkDerivation (args' // { name = "${name}.drv"; @@ -39,9 +36,7 @@ let export NIX_NINJA_DRV="true" export NINJA="${nix-ninja}/bin/nix-ninja" export NIX_CONFIG="extra-experimental-features = nix-command ca-derivations dynamic-derivations" - '' + (lib.optionalString (builtins.length nixNinjaExtraInputs > 0) '' - export NIX_NINJA_EXTRA_INPUTS="${extraInputs}" - ''); + ''; buildPhase = '' runHook preBuild diff --git a/modules/nixos/default.nix b/modules/nixos/default.nix index b09f1a9..c344537 100644 --- a/modules/nixos/default.nix +++ b/modules/nixos/default.nix @@ -4,5 +4,6 @@ nixosTests.nix-build-header = import ./tests/nix-build-header.nix; nixosTests.nix-build-multi-source = import ./tests/nix-build-multi-source.nix; nixosTests.nix-build-shared-lib = import ./tests/nix-build-shared-lib.nix; + nixosTests.nix-build-dynamic-deps = import ./tests/nix-build-dynamic-deps.nix; }; } diff --git a/modules/nixos/tests/nix-build-dynamic-deps.nix b/modules/nixos/tests/nix-build-dynamic-deps.nix new file mode 100644 index 0000000..ef76271 --- /dev/null +++ b/modules/nixos/tests/nix-build-dynamic-deps.nix @@ -0,0 +1,8 @@ +{ self, pkgs, lib, ... }@args: + +import ./nix-build.nix { + flakeOutput = "example-dynamic-deps"; + inputsFrom = [ pkgs.example-dynamic-deps ]; + cmdline = "main"; + expectedStdout = "Hello dynamic-deps example!"; +} args