Skip to content

Commit e1bb570

Browse files
committed
performance(stdlib): Switch to much faster ua-parser
1 parent 2d5e2df commit e1bb570

5 files changed

Lines changed: 191 additions & 75 deletions

File tree

Cargo.lock

Lines changed: 36 additions & 28 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ stdlib = [
110110
"dep:strip-ansi-escapes",
111111
"dep:syslog_loose",
112112
"dep:tokio",
113-
"dep:uaparser",
113+
"dep:ua-parser",
114114
"dep:url",
115115
"dep:utf8-width",
116116
"dep:uuid",
@@ -192,7 +192,7 @@ syslog_loose = { version = "0.21", optional = true }
192192
termcolor = { version = "1", optional = true }
193193
thiserror = { version = "2", optional = true }
194194
tracing = { version = "0.1", default-features = false }
195-
uaparser = { version = "0.6", default-features = false, optional = true }
195+
ua-parser = { version = "0.2", optional = true }
196196
utf8-width = { version = "0.1", optional = true }
197197
url = { version = "2", optional = true }
198198
snafu = { version = "0.8", optional = true }
@@ -250,6 +250,8 @@ proptest-derive = { version = "0.5" }
250250

251251
[build-dependencies]
252252
lalrpop = { version = "0.22", default-features = false }
253+
serde_yaml = "0.9.34"
254+
ua-parser = { version = "0.2" }
253255

254256
[[bench]]
255257
name = "kind"

LICENSE-3rdparty.csv

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ colorchoice,https://github.com/rust-cli/anstyle,MIT OR Apache-2.0,The colorchoic
5353
combine,https://github.com/Marwes/combine,MIT,Markus Westerlind <marwes91@gmail.com>
5454
community-id,https://github.com/traceflight/rs-community-id,MIT OR Apache-2.0,Julian Wang <traceflight@outlook.com>
5555
concurrent-queue,https://github.com/smol-rs/concurrent-queue,Apache-2.0 OR MIT,"Stjepan Glavina <stjepang@gmail.com>, Taiki Endo <te316e89@gmail.com>, John Nunley <dev@notgull.net>"
56-
convert_case,https://github.com/rutrum/convert-case,MIT,David Purdum <purdum41@gmail.com>
5756
convert_case,https://github.com/rutrum/convert-case,MIT,rutrum <dave@rutrum.net>
5857
core-foundation,https://github.com/servo/core-foundation-rs,MIT OR Apache-2.0,The Servo Project Developers
5958
cpufeatures,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
@@ -71,7 +70,6 @@ ctr,https://github.com/RustCrypto/block-modes,MIT OR Apache-2.0,RustCrypto Devel
7170
data-encoding,https://github.com/ia0/data-encoding,MIT,Julien Cretin <git@ia0.eu>
7271
dbl,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
7372
deranged,https://github.com/jhpratt/deranged,MIT OR Apache-2.0,Jacob Pratt <jacob@jhpratt.dev>
74-
derive_more,https://github.com/JelteF/derive_more,MIT,Jelte Fennema <github-tech@jeltef.nl>
7573
digest,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto Developers
7674
dirs-next,https://github.com/xdg-rs/dirs,MIT OR Apache-2.0,The @xdg-rs members
7775
dirs-sys-next,https://github.com/xdg-rs/dirs/tree/master/dirs-sys,MIT OR Apache-2.0,The @xdg-rs members
@@ -149,6 +147,7 @@ mio,https://github.com/tokio-rs/mio,MIT,"Carl Lerche <me@carllerche.com>, Thomas
149147
moka,https://github.com/moka-rs/moka,MIT OR Apache-2.0,The moka Authors
150148
ndk-context,https://github.com/rust-windowing/android-ndk-rs,MIT OR Apache-2.0,The Rust Windowing contributors
151149
nix,https://github.com/nix-rust/nix,MIT,The nix-rust Project Developers
150+
nohash,https://github.com/tetcoin/nohash,Apache-2.0 OR MIT,Parity Technologies <admin@parity.io>
152151
nom,https://github.com/Geal/nom,MIT,contact@geoffroycouprie.com
153152
nu-ansi-term,https://github.com/nushell/nu-ansi-term,MIT,"ogham@bsago.me, Ryan Scheel (Havvy) <ryan.havvy@gmail.com>, Josh Triplett <josh@joshtriplett.org>, The Nushell Project Developers"
154153
num-bigint,https://github.com/rust-num/num-bigint,MIT OR Apache-2.0,The Rust Project Developers
@@ -201,6 +200,7 @@ redox_users,https://gitlab.redox-os.org/redox-os/users,MIT,"Jose Narvaez <goyox8
201200
regex,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant <jamslam@gmail.com>"
202201
regex-automata,https://github.com/BurntSushi/regex-automata,Unlicense OR MIT,Andrew Gallant <jamslam@gmail.com>
203202
regex-automata,https://github.com/rust-lang/regex/tree/master/regex-automata,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant <jamslam@gmail.com>"
203+
regex-filtered,https://github.com/ua-parser/uap-rust,BSD-3-Clause,The regex-filtered Authors
204204
regex-syntax,https://github.com/rust-lang/regex,MIT OR Apache-2.0,The Rust Project Developers
205205
regex-syntax,https://github.com/rust-lang/regex/tree/master/regex-syntax,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant <jamslam@gmail.com>"
206206
roxmltree,https://github.com/RazrFalcon/roxmltree,MIT OR Apache-2.0,Yevhenii Reizner <razrfalcon@gmail.com>
@@ -215,7 +215,6 @@ scopeguard,https://github.com/bluss/scopeguard,MIT OR Apache-2.0,bluss
215215
seahash,https://gitlab.redox-os.org/redox-os/seahash,MIT,"ticki <ticki@users.noreply.github.com>, Tom Almeida <tom@tommoa.me>"
216216
serde,https://github.com/serde-rs/serde,MIT OR Apache-2.0,"Erick Tryzelaar <erick.tryzelaar@gmail.com>, David Tolnay <dtolnay@gmail.com>"
217217
serde_json,https://github.com/serde-rs/json,MIT OR Apache-2.0,"Erick Tryzelaar <erick.tryzelaar@gmail.com>, David Tolnay <dtolnay@gmail.com>"
218-
serde_yaml,https://github.com/dtolnay/serde-yaml,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
219218
sha-1,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers
220219
sha1,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers
221220
sha2,https://github.com/RustCrypto/hashes,MIT OR Apache-2.0,RustCrypto Developers
@@ -251,13 +250,12 @@ tracing-core,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors <team@to
251250
tracing-log,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors <team@tokio.rs>
252251
tracing-subscriber,https://github.com/tokio-rs/tracing,MIT,"Eliza Weisman <eliza@buoyant.io>, David Barsky <me@davidbarsky.com>, Tokio Contributors <team@tokio.rs>"
253252
typenum,https://github.com/paholg/typenum,MIT OR Apache-2.0,"Paho Lurie-Gregg <paho@paholg.com>, Andre Bogus <bogusandre@gmail.com>"
254-
uaparser,https://github.com/davidarmstronglewis/uap-rs,MIT,Ocean Lewis
253+
ua-parser,https://github.com/ua-parser/uap-rust,Apache-2.0,The ua-parser Authors
255254
ucd-trie,https://github.com/BurntSushi/ucd-generate,MIT OR Apache-2.0,Andrew Gallant <jamslam@gmail.com>
256255
unicode-ident,https://github.com/dtolnay/unicode-ident,(MIT OR Apache-2.0) AND Unicode-3.0,David Tolnay <dtolnay@gmail.com>
257256
unicode-segmentation,https://github.com/unicode-rs/unicode-segmentation,MIT OR Apache-2.0,"kwantam <kwantam@gmail.com>, Manish Goregaokar <manishsmail@gmail.com>"
258257
unicode-width,https://github.com/unicode-rs/unicode-width,MIT OR Apache-2.0,"kwantam <kwantam@gmail.com>, Manish Goregaokar <manishsmail@gmail.com>"
259258
universal-hash,https://github.com/RustCrypto/traits,MIT OR Apache-2.0,RustCrypto Developers
260-
unsafe-libyaml,https://github.com/dtolnay/unsafe-libyaml,MIT,David Tolnay <dtolnay@gmail.com>
261259
url,https://github.com/servo/rust-url,MIT OR Apache-2.0,The rust-url developers
262260
utf16_iter,https://github.com/hsivonen/utf16_iter,Apache-2.0 OR MIT,Henri Sivonen <hsivonen@hsivonen.fi>
263261
utf8-width,https://github.com/magiclen/utf8-width,MIT,Magic Len <len@magiclen.org>

build.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
extern crate lalrpop;
22

33
use std::{
4+
borrow::Cow,
45
env,
56
fmt::Write as fmt_write,
67
fs::{self, File},
78
io::{BufRead, BufReader},
89
path::Path,
910
};
11+
use ua_parser::device::Flag;
1012

1113
fn main() {
1214
read_grok_patterns();
1315

16+
#[cfg(feature = "stdlib")]
17+
convert_user_agent_regexes();
18+
1419
println!("cargo:rerun-if-changed=src/parser/parser.lalrpop");
1520
lalrpop::Configuration::new()
1621
.always_use_colors()
@@ -51,3 +56,75 @@ fn read_grok_patterns() {
5156
let dest_path = Path::new(&out_dir).join("patterns.rs");
5257
fs::write(dest_path, output).expect("'patterns.rs' wasn't generated");
5358
}
59+
60+
#[cfg(feature = "stdlib")]
61+
fn convert_user_agent_regexes() {
62+
println!("cargo:rerun-if-changed=data/user_agent_regexes.yaml");
63+
64+
let regexes = fs::read("data/user_agent_regexes.yaml").expect("Could not read regexes");
65+
let regexes: ua_parser::Regexes =
66+
serde_yaml::from_slice(&regexes).expect("Regex file is not valid");
67+
68+
fn replacement(output: &mut Vec<u8>, name: &'static str, value: Option<Cow<str>>) {
69+
let string = if let Some(value) = value {
70+
format!(" {}: Some(r#\"{}\"#.into()),\n", name, value)
71+
} else {
72+
format!(" {}: None,\n", name)
73+
};
74+
output.extend(string.bytes());
75+
}
76+
77+
let mut output = Vec::new();
78+
79+
output.extend(b"ua_parser::Regexes {\n");
80+
81+
output.extend(b"os_parsers: vec![\n");
82+
for os in regexes.os_parsers {
83+
output.extend(b"#[allow(clippy::needless_raw_string_hashes)]\n");
84+
output.extend(b"ua_parser::os::Parser {\n");
85+
output.extend(format!(" regex: r#\"{}\"#.into(),\n", os.regex).bytes());
86+
replacement(&mut output, "os_replacement", os.os_replacement);
87+
replacement(&mut output, "os_v1_replacement", os.os_v1_replacement);
88+
replacement(&mut output, "os_v2_replacement", os.os_v2_replacement);
89+
replacement(&mut output, "os_v3_replacement", os.os_v3_replacement);
90+
replacement(&mut output, "os_v4_replacement", os.os_v4_replacement);
91+
output.extend(b"},\n");
92+
}
93+
output.extend(b"],\n");
94+
95+
output.extend(b"user_agent_parsers: vec![\n");
96+
for ua in regexes.user_agent_parsers {
97+
output.extend(b"#[allow(clippy::needless_raw_string_hashes)]\n");
98+
output.extend(b"ua_parser::user_agent::Parser {\n");
99+
output.extend(format!(" regex: r#\"{}\"#.into(),\n", ua.regex).bytes());
100+
replacement(&mut output, "family_replacement", ua.family_replacement);
101+
replacement(&mut output, "v1_replacement", ua.v1_replacement);
102+
replacement(&mut output, "v2_replacement", ua.v2_replacement);
103+
replacement(&mut output, "v3_replacement", ua.v3_replacement);
104+
replacement(&mut output, "v4_replacement", ua.v4_replacement);
105+
output.extend(b"},\n");
106+
}
107+
output.extend(b"],\n");
108+
109+
output.extend(b"device_parsers: vec![\n");
110+
for device in regexes.device_parsers {
111+
output.extend(b"#[allow(clippy::needless_raw_string_hashes)]\n");
112+
output.extend(b"ua_parser::device::Parser {\n");
113+
output.extend(format!(" regex: r#\"{}\"#.into(),\n", device.regex).bytes());
114+
output.extend(match device.regex_flag {
115+
Some(Flag::IgnoreCase) => {
116+
b" regex_flag: Some(ua_parser::device::Flag::IgnoreCase),\n".as_ref()
117+
}
118+
None => b" regex_flag: None,\n".as_ref(),
119+
});
120+
replacement(&mut output, "device_replacement", device.device_replacement);
121+
replacement(&mut output, "brand_replacement", device.brand_replacement);
122+
replacement(&mut output, "model_replacement", device.model_replacement);
123+
output.extend(b"},\n");
124+
}
125+
output.extend(b"],\n}\n");
126+
127+
let out_dir = env::var("OUT_DIR").expect("OUT_DIR isn't defined");
128+
let dest_path = Path::new(&out_dir).join("user_agent_regexes.rs");
129+
fs::write(dest_path, output).expect("'user_agent_regexes.rs' wasn't generated");
130+
}

0 commit comments

Comments
 (0)