Skip to content

performance(stdlib): Switch to much faster ua-parser #1317

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 36 additions & 28 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ stdlib = [
"dep:strip-ansi-escapes",
"dep:syslog_loose",
"dep:tokio",
"dep:uaparser",
"dep:ua-parser",
"dep:url",
"dep:utf8-width",
"dep:uuid",
Expand Down Expand Up @@ -192,7 +192,7 @@ syslog_loose = { version = "0.21", optional = true }
termcolor = { version = "1", optional = true }
thiserror = { version = "2", optional = true }
tracing = { version = "0.1", default-features = false }
uaparser = { version = "0.6", default-features = false, optional = true }
ua-parser = { version = "0.2", optional = true }
utf8-width = { version = "0.1", optional = true }
url = { version = "2", optional = true }
snafu = { version = "0.8", optional = true }
Expand Down Expand Up @@ -250,6 +250,8 @@ proptest-derive = { version = "0.5" }

[build-dependencies]
lalrpop = { version = "0.22", default-features = false }
serde_yaml = "0.9.34"
ua-parser = { version = "0.2" }

[[bench]]
name = "kind"
Expand Down
8 changes: 3 additions & 5 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ colorchoice,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The colorchoic
combine,https://github.com/Marwes/combine,MIT,Markus Westerlind <[email protected]>
community-id,https://github.com/traceflight/rs-community-id,MIT OR Apache-2.0,Julian Wang <[email protected]>
concurrent-queue,https://github.com/smol-rs/concurrent-queue,Apache-2.0 OR MIT,"Stjepan Glavina <[email protected]>, Taiki Endo <[email protected]>, John Nunley <[email protected]>"
convert_case,https://github.com/rutrum/convert-case,MIT,David Purdum <[email protected]>
convert_case,https://github.com/rutrum/convert-case,MIT,rutrum <[email protected]>
core-foundation,https://github.com/servo/core-foundation-rs,MIT OR Apache-2.0,The Servo Project Developers
cpufeatures,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
Expand All @@ -71,7 +70,6 @@ ctr,https://github.com/RustCrypto/block-modes,MIT OR Apache-2.0,RustCrypto Devel
data-encoding,https://github.com/ia0/data-encoding,MIT,Julien Cretin <[email protected]>
dbl,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
deranged,https://github.com/jhpratt/deranged,MIT OR Apache-2.0,Jacob Pratt <[email protected]>
derive_more,https://github.com/JelteF/derive_more,MIT,Jelte Fennema <[email protected]>
digest,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto Developers
dirs-next,https://github.com/xdg-rs/dirs,MIT OR Apache-2.0,The @xdg-rs members
dirs-sys-next,https://github.com/xdg-rs/dirs/tree/master/dirs-sys,MIT OR Apache-2.0,The @xdg-rs members
Expand Down Expand Up @@ -149,6 +147,7 @@ mio,https://github.com/tokio-rs/mio,MIT,"Carl Lerche <[email protected]>, Thomas
moka,https://github.com/moka-rs/moka,MIT OR Apache-2.0,The moka Authors
ndk-context,https://github.com/rust-windowing/android-ndk-rs,MIT OR Apache-2.0,The Rust Windowing contributors
nix,https://github.com/nix-rust/nix,MIT,The nix-rust Project Developers
nohash,https://github.com/tetcoin/nohash,Apache-2.0 OR MIT,Parity Technologies <[email protected]>
nom,https://github.com/Geal/nom,MIT,[email protected]
nu-ansi-term,https://github.com/nushell/nu-ansi-term,MIT,"[email protected], Ryan Scheel (Havvy) <[email protected]>, Josh Triplett <[email protected]>, The Nushell Project Developers"
num-bigint,https://github.com/rust-num/num-bigint,MIT OR Apache-2.0,The Rust Project Developers
Expand Down Expand Up @@ -201,6 +200,7 @@ redox_users,https://gitlab.redox-os.org/redox-os/users,MIT,"Jose Narvaez <goyox8
regex,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant <[email protected]>"
regex-automata,https://github.com/BurntSushi/regex-automata,Unlicense OR MIT,Andrew Gallant <[email protected]>
regex-automata,https://github.com/rust-lang/regex/tree/master/regex-automata,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant <[email protected]>"
regex-filtered,https://github.com/ua-parser/uap-rust,BSD-3-Clause,The regex-filtered Authors
regex-syntax,https://github.com/rust-lang/regex,MIT OR Apache-2.0,The Rust Project Developers
regex-syntax,https://github.com/rust-lang/regex/tree/master/regex-syntax,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant <[email protected]>"
roxmltree,https://github.com/RazrFalcon/roxmltree,MIT OR Apache-2.0,Yevhenii Reizner <[email protected]>
Expand All @@ -215,7 +215,6 @@ scopeguard,https://github.com/bluss/scopeguard,MIT OR Apache-2.0,bluss
seahash,https://gitlab.redox-os.org/redox-os/seahash,MIT,"ticki <[email protected]>, Tom Almeida <[email protected]>"
serde,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar <[email protected]>, David Tolnay <[email protected]>"
serde_json,https://github.com/serde-rs/json,MIT OR Apache-2.0,"Erick Tryzelaar <[email protected]>, David Tolnay <[email protected]>"
serde_yaml,https://github.com/dtolnay/serde-yaml,MIT OR Apache-2.0,David Tolnay <[email protected]>
sha-1,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers
sha1,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers
sha2,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers
Expand Down Expand Up @@ -251,13 +250,12 @@ tracing-core,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors <team@to
tracing-log,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors <[email protected]>
tracing-subscriber,https://github.com/tokio-rs/tracing,MIT,"Eliza Weisman <[email protected]>, David Barsky <[email protected]>, Tokio Contributors <[email protected]>"
typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg <[email protected]>, Andre Bogus <[email protected]>"
uaparser,https://github.com/davidarmstronglewis/uap-rs,MIT,Ocean Lewis
ua-parser,https://github.com/ua-parser/uap-rust,Apache-2.0,The ua-parser Authors
ucd-trie,https://github.com/BurntSushi/ucd-generate,MIT OR Apache-2.0,Andrew Gallant <[email protected]>
unicode-ident,https://github.com/dtolnay/unicode-ident,(MIT OR Apache-2.0) AND Unicode-3.0,David Tolnay <[email protected]>
unicode-segmentation,https://github.com/unicode-rs/unicode-segmentation,MIT OR Apache-2.0,"kwantam <[email protected]>, Manish Goregaokar <[email protected]>"
unicode-width,https://github.com/unicode-rs/unicode-width,MIT OR Apache-2.0,"kwantam <[email protected]>, Manish Goregaokar <[email protected]>"
universal-hash,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto Developers
unsafe-libyaml,https://github.com/dtolnay/unsafe-libyaml,MIT,David Tolnay <[email protected]>
url,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers
utf16_iter,https://github.com/hsivonen/utf16_iter,Apache-2.0 OR MIT,Henri Sivonen <[email protected]>
utf8-width,https://github.com/magiclen/utf8-width,MIT,Magic Len <[email protected]>
Expand Down
76 changes: 76 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
extern crate lalrpop;

use std::{
borrow::Cow,
env,
fmt::Write as fmt_write,
fs::{self, File},
io::{BufRead, BufReader},
path::Path,
};
use ua_parser::device::Flag;

fn main() {
read_grok_patterns();

#[cfg(feature = "stdlib")]
convert_user_agent_regexes();

println!("cargo:rerun-if-changed=src/parser/parser.lalrpop");
lalrpop::Configuration::new()
.always_use_colors()
Expand Down Expand Up @@ -51,3 +56,74 @@ fn read_grok_patterns() {
let dest_path = Path::new(&out_dir).join("patterns.rs");
fs::write(dest_path, output).expect("'patterns.rs' wasn't generated");
}

#[cfg(feature = "stdlib")]
fn convert_user_agent_regexes() {
let regexes = fs::read("data/user_agent_regexes.yaml").expect("Could not read regexes");
let regexes: ua_parser::Regexes =
serde_yaml::from_slice(&regexes).expect("Regex file is not valid yaml");

fn write_item(output: &mut Vec<u8>, name: &'static str, value: Option<Cow<str>>) {
if let Some(value) = value {
output.extend(format!(" {}: Some(r#\"{}\"#.into()),\n", name, value).bytes());
} else {
output.extend(format!(" {}: None,\n", name).bytes());
}
}

let mut output = Vec::new();

output.extend(b"ua_parser::Regexes {\n");

output.extend(b"os_parsers: vec![\n");
for os in regexes.os_parsers {
output.extend(b"#[allow(clippy::needless_raw_string_hashes)]\n");
output.extend(b"ua_parser::os::Parser {\n");
output.extend(format!(" regex: r#\"{}\"#.into(),\n", os.regex).bytes());
write_item(&mut output, "os_replacement", os.os_replacement);
write_item(&mut output, "os_v1_replacement", os.os_v1_replacement);
write_item(&mut output, "os_v2_replacement", os.os_v2_replacement);
write_item(&mut output, "os_v3_replacement", os.os_v3_replacement);
write_item(&mut output, "os_v4_replacement", os.os_v4_replacement);
output.extend(b"},\n");
}
output.extend(b"],\n");

output.extend(b"user_agent_parsers: vec![\n");
for ua in regexes.user_agent_parsers {
output.extend(b"#[allow(clippy::needless_raw_string_hashes)]\n");
output.extend(b"ua_parser::user_agent::Parser {\n");
output.extend(format!(" regex: r#\"{}\"#.into(),\n", ua.regex).bytes());
write_item(&mut output, "family_replacement", ua.family_replacement);
write_item(&mut output, "v1_replacement", ua.v1_replacement);
write_item(&mut output, "v2_replacement", ua.v2_replacement);
write_item(&mut output, "v3_replacement", ua.v3_replacement);
write_item(&mut output, "v4_replacement", ua.v4_replacement);
output.extend(b"},\n");
}
output.extend(b"],\n");

output.extend(b"device_parsers: vec![\n");
for device in regexes.device_parsers {
output.extend(b"#[allow(clippy::needless_raw_string_hashes)]\n");
output.extend(b"ua_parser::device::Parser {\n");
output.extend(format!(" regex: r#\"{}\"#.into(),\n", device.regex).bytes());
match device.regex_flag {
Some(Flag::IgnoreCase) => {
output.extend(b" regex_flag: Some(ua_parser::device::Flag::IgnoreCase),\n");
}
None => {
output.extend(b" regex_flag: None,\n");
}
}
write_item(&mut output, "device_replacement", device.device_replacement);
write_item(&mut output, "brand_replacement", device.brand_replacement);
write_item(&mut output, "model_replacement", device.model_replacement);
output.extend(b"},\n");
}
output.extend(b"],\n}\n");

let out_dir = env::var("OUT_DIR").expect("OUT_DIR isn't defined");
let dest_path = Path::new(&out_dir).join("user_agent_regexes.rs");
fs::write(dest_path, output).expect("'user_agent_regexes.rs' wasn't generated");
}
4 changes: 4 additions & 0 deletions changelog.d/1317.enhancement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The `parse_user_agent` method now uses the [ua-parser](https://crates.io/crates/ua-parser) library
which is much faster than the previous library. The method's output remains unchanged.

authors: JakubOnderka
Loading
Loading