From 58b85a21f7b999e99f28503f055d58804cba45cb Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Wed, 9 Apr 2025 23:54:18 -0400 Subject: [PATCH] allow renaming rules for C enums, structs, and typedefs ```rust // true to enable debug output as warnings let mut ren = Renamer::new(true); // rename a single item, e.g. a struct, enum, or a typedef ren.rename_item("my_struct", "MyStruct"); // rename an enum and its values rename_enum!( ren, "my_enum" => "MyEnum", // rename the enum itself remove: "^I_SAID_", // optionally any number of "remove" regexes remove: "_ENUM$", case: Pascal, // optionally set case convert, defaults to "PascalCase" "MV_IT" => "Value1", // rename a specific value after pattern removal "MV_IT2" => "Value2", // more specific value renames ); let bindings = Builder::default() // in real code, use .header("path/to/header.h") .header_contents("test.h", r#" struct my_struct { int a; }; enum my_enum { I_SAID_YES_ENUM, I_SAID_NO_ENUM, I_SAID_MV_IT_ENUM, I_SAID_MV_IT2_ENUM, }; "#) .rustified_enum(ren.get_regex_str()) .parse_callbacks(Box::new(ren)) .generate().unwrap(); } /////////// generated code #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct MyStruct { pub a: ::std::os::raw::c_int, } #[repr(u32)] #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] pub enum MyEnum { Yes = 0, No = 1, Value1 = 2, Value2 = 3, } ``` --- Cargo.lock | 16 +++ Cargo.toml | 1 + bindgen/Cargo.toml | 1 + bindgen/ir/context.rs | 6 +- bindgen/lib.rs | 38 ++++++ bindgen/renamer.rs | 294 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 353 insertions(+), 3 deletions(-) create mode 100644 bindgen/renamer.rs diff --git a/Cargo.lock b/Cargo.lock index 4e88c224f5..dd71c67729 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,6 +43,7 @@ dependencies = [ "clang-sys", "clap", "clap_complete", + "convert_case", "itertools", "log", "prettyplease", @@ -187,6 +188,15 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "convert_case" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "either" version = "1.13.0" @@ -628,6 +638,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.14" diff --git a/Cargo.toml b/Cargo.toml index b0a5bbb082..68e892c4ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ cexpr = "0.6" clang-sys = "1" clap = "4" clap_complete = "4" +convert_case = "0.8.0" env_logger = "0.10.0" itertools = { version = ">=0.10,<0.14", default-features = false } libloading = "0.8" diff --git a/bindgen/Cargo.toml b/bindgen/Cargo.toml index c01f8f0c44..2f67a2f0c7 100644 --- a/bindgen/Cargo.toml +++ b/bindgen/Cargo.toml @@ -32,6 +32,7 @@ cexpr.workspace = true clang-sys = { workspace = true, features = ["clang_11_0"] } clap = { workspace = true, features = ["derive"], optional = true } clap_complete = { workspace = true, optional = true } +convert_case.workspace = true itertools = { workspace = true } log = { workspace = true, optional = true } prettyplease = { workspace = true, optional = true, features = ["verbatim"] } diff --git a/bindgen/ir/context.rs b/bindgen/ir/context.rs index 3f9e16ac9b..ab457222de 100644 --- a/bindgen/ir/context.rs +++ b/bindgen/ir/context.rs @@ -395,15 +395,15 @@ pub(crate) struct BindgenContext { /// Whether a bindgen float16 was generated generated_bindgen_float16: Cell, - /// The set of `ItemId`s that are allowlisted. This the very first thing + /// The set of [`ItemId`]s that are allowlisted. This the very first thing /// computed after parsing our IR, and before running any of our analyses. allowlisted: Option, - /// Cache for calls to `ParseCallbacks::blocklisted_type_implements_trait` + /// Cache for calls to [`crate::callbacks::ParseCallbacks::blocklisted_type_implements_trait`] blocklisted_types_implement_traits: RefCell>>, - /// The set of `ItemId`s that are allowlisted for code generation _and_ that + /// The set of [`ItemId`]s that are allowlisted for code generation _and_ that /// we should generate accounting for the codegen options. /// /// It's computed right after computing the allowlisted items. diff --git a/bindgen/lib.rs b/bindgen/lib.rs index 12ac8a2998..2e8c32d541 100644 --- a/bindgen/lib.rs +++ b/bindgen/lib.rs @@ -46,6 +46,7 @@ mod features; mod ir; mod parse; mod regex_set; +mod renamer; pub use codegen::{ AliasVariation, EnumVariation, MacroTypeVariation, NonCopyUnionStyle, @@ -55,6 +56,7 @@ pub use ir::annotations::FieldVisibilityKind; pub use ir::function::Abi; #[cfg(feature = "__cli")] pub use options::cli::builder_from_flags; +pub use renamer::{Case, IdentRenamer, Regex, Renamer}; use codegen::CodegenError; use features::RustFeatures; @@ -1312,6 +1314,42 @@ impl callbacks::ParseCallbacks for CargoCallbacks { } } +/// Macro to help define renaming rules for an enum and its values. See an example in the [`Renamer`] documentation. +#[macro_export] +macro_rules! rename_enum { + ( $cb:expr, + $c_name:literal => $rust_name:literal + $(, remove: $remove:literal)* + $(, case: $case:ident)? + $(, $itm:literal => $ren:literal)* + $(,)? + ) => { + $cb.rename_item($c_name, $rust_name); + #[allow(clippy::needless_update)] + $cb.rename_enum_val( + Some(concat!("enum ", $c_name)), + $crate::IdentRenamer { + remove: { + let patterns: Vec<&str> = vec![$($remove),*]; + if patterns.is_empty() { + None + } else { + Some( + patterns + .into_iter() + .map(|v| $crate::Regex::new(v).expect("Unable to compile regex for remove parameter")) + .collect() + ) + } + }, + $( case: Some($crate::Case::$case), )? + renames: vec![$( ($itm.into(), $ren.into()), )*].into_iter().collect(), + ..$crate::IdentRenamer::default_case($crate::Case::Pascal) + } + ); + }; +} + /// Test `command_line_flag` function. #[test] fn commandline_flag_unit_test_function() { diff --git a/bindgen/renamer.rs b/bindgen/renamer.rs new file mode 100644 index 0000000000..4001d32260 --- /dev/null +++ b/bindgen/renamer.rs @@ -0,0 +1,294 @@ +use std::collections::HashMap; + +pub use convert_case::Case; +use convert_case::Casing as _; +pub use regex::Regex; + +use crate::callbacks::{EnumVariantValue, ParseCallbacks}; + +/// Define the rules how a C identifier should be renamed. +#[derive(Debug, Default)] +pub struct IdentRenamer { + /// Any regexes to remove substrings from the value. Applied in the given order before any explicit renaming. + pub remove: Option>, + /// Explicit renaming once all matching strings are removed. If a match is found, skips automatic case change. + pub renames: HashMap, + /// Which case to convert the value to, unless explicitly renamed. + pub case: Option>, +} + +impl IdentRenamer { + /// Create a new `IdentRenamer` instance, specifying the case to convert to. + #[must_use] + pub fn default_case(case: Case<'static>) -> Self { + Self { + case: Some(case), + ..Default::default() + } + } + + fn apply(&self, val: &str) -> String { + let mut val = val.to_owned(); + if let Some(remove) = &self.remove { + for re in remove { + val = re.replace(&val, "").into(); + } + } + if let Some(new_val) = self.renames.get(val.as_str()) { + new_val.to_string() + } else if let Some(case) = self.case { + val.to_case(case) + } else { + val + } + } +} + +/// Renamer is a struct that implements the `ParseCallbacks` trait. +/// It is used to rename C items like enums, structs, and typedefs in the generated Rust bindings. +/// +/// # Usage +/// +/// To use `Renamer` from your `build.rs`, you need to create a new instance of it and configure it with the renaming rules. +/// You can rename items by their name or by using regex patterns. You can also specify how to rename enum values. +/// +/// ``` +/// // build.rs +/// use bindgen::{Builder, Renamer, rename_enum}; +/// +/// // true to enable debug output as warnings +/// let mut ren = Renamer::new(true); +/// +/// // rename a single item, e.g., a struct, enum, or a typedef +/// ren.rename_item("my_struct", "MyStruct"); +/// +/// // rename an enum and its values +/// rename_enum!( +/// ren, +/// "my_enum" => "MyEnum", // rename the enum itself +/// remove: "^I_SAID_", // optionally any number of "remove" regexes +/// remove: "_ENUM$", +/// case: Pascal, // optionally set case convert, defaults to "PascalCase" +/// "MV_IT" => "Value1", // rename a specific value after pattern removal +/// "MV_IT2" => "Value2", // more specific value renames +/// ); +/// +/// let bindings = Builder::default() +/// // in real code, use .header("path/to/header.h") +/// .header_contents("test.h", r#" +/// +/// struct my_struct { +/// int a; +/// }; +/// +/// enum my_enum { +/// I_SAID_YES_ENUM, +/// I_SAID_NO_ENUM, +/// I_SAID_MV_IT_ENUM, +/// I_SAID_MV_IT2_ENUM, +/// }; +/// +/// "#) +/// // note that generated regex str includes all the renames, not just enums +/// .rustified_enum(ren.get_regex_str()) +/// .parse_callbacks(Box::new(ren)) +/// .generate().unwrap(); +/// ``` +/// +/// # Generated Code +/// +/// This is the approximate code that would be generated by the above: +/// +/// ```rust,ignore +/// #[repr(C)] +/// #[derive(Debug, Copy, Clone)] +/// pub struct MyStruct { +/// pub a: ::std::os::raw::c_int, +/// } +/// +/// #[repr(u32)] +/// #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +/// pub enum MyEnum { +/// Yes = 0, +/// No = 1, +/// Value1 = 2, +/// Value2 = 3, +/// } +/// ``` +/// +/// +#[derive(Debug, Default)] +pub struct Renamer { + /// Enable debug output + debug: bool, + /// Rename C items like enums, structs, and aliases, replacing them with a new name. + item_renames: HashMap, + /// Rename C items like enums, structs, and aliases that match a regex and apply a renamer. + /// The regex string must not contain '^' or '$' symbols. + item_renames_ext: Vec<(Regex, IdentRenamer)>, + /// Matches C enum names (i.e. "enum foo"). + /// Note that the regex might be None because the callback might also not have it for some enums. + enum_renames: Vec<(Option, IdentRenamer)>, +} + +impl Renamer { + /// Create a new `Renamer` instance, with the option to enable debug output. + #[must_use] + pub fn new(debug: bool) -> Self { + Self { + debug, + ..Default::default() + } + } + + /// Get a regex string that matches all configured C items + #[must_use] + pub fn get_regex_str(&self) -> String { + self.item_renames_ext + .iter() + .map(|(re, _)| re.as_str()) + .chain(self.item_renames.keys().map(String::as_str)) + .fold(String::new(), |mut acc, re| { + if !acc.is_empty() { + acc.push('|'); + } + acc.push_str(re); + acc + }) + } + + /// Rename a single C item, e.g., a struct, enum, or a typedef. + pub fn rename_item( + &mut self, + c_name: impl AsRef, + rust_name: impl AsRef, + ) { + self.item_renames + .insert(c_name.as_ref().into(), rust_name.as_ref().into()); + } + + /// Rename any C item, including enums and structs. + /// + /// # Panics + /// Will panic if the regex contains '^' or '$' symbols. + pub fn rename_many(&mut self, c_name: Regex, renamer: IdentRenamer) { + assert!( + !c_name.as_str().contains('^'), + "Regex must not contain '^' symbol" + ); + assert!( + !c_name.as_str().contains('$'), + "Regex must not contain '$' symbol" + ); + self.item_renames_ext.push((c_name, renamer)); + } + + /// Rename enum values. Make sure `enum_c_name` is in the form `enum some_enum_name`. + /// + /// # Panics + /// Will panic if the `enum_c_name` is not a valid regex. + pub fn rename_enum_val( + &mut self, + enum_c_name: Option<&str>, + val_renamer: IdentRenamer, + ) { + self.enum_renames.push(( + enum_c_name + .map(|v| Regex::new(v).expect("Invalid enum_c_name regex")), + val_renamer, + )); + } +} + +impl ParseCallbacks for Renamer { + fn enum_variant_name( + &self, + enum_name: Option<&str>, + value: &str, + _variant_value: EnumVariantValue, + ) -> Option { + self.enum_renames + .iter() + .filter_map(|(re, rn)| match (enum_name, re) { + (Some(enum_name), Some(re)) if re.is_match(enum_name) => Some(rn), + (None, None) => Some(rn), + _ => None, + }) + .map(|rn| rn.apply(value)) + .next() + .or_else(|| { + if self.debug { + let name = enum_name.unwrap(); + println!("cargo::warning=Unrecognized enum variant {name} :: {value}"); + } + None + }) + } + + fn item_name(&self, item_name: &str) -> Option { + self.item_renames + .get(item_name) + .map(ToString::to_string) + .or_else(|| { + self.item_renames_ext + .iter() + .filter_map(|(re, rn)| { + if re.is_match(item_name) { + Some(rn) + } else { + None + } + }) + .map(|rn| rn.apply(item_name)) + .next() + }) + .or_else(|| { + if self.debug { + println!("cargo::warning=Unrecognized item {item_name}"); + } + None + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_regex_str() { + let mut cb = Renamer::new(false); + cb.rename_item("bar", "baz"); + cb.rename_many(Regex::new(r"foo.*").unwrap(), IdentRenamer::default()); + cb.rename_many(Regex::new("bas").unwrap(), IdentRenamer::default()); + assert_eq!(cb.get_regex_str(), "foo.*|bas|bar"); + } +}