diff --git a/Cargo.lock b/Cargo.lock index cca1f67c..8d1f80d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,11 +56,54 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" -version = "1.0.7" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys 0.59.0", +] [[package]] name = "assert_cmd" @@ -130,12 +173,6 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.5.0" @@ -223,28 +260,50 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.25" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +checksum = "6088f3ae8c3608d19260cd7445411865a485688711b78b5be70d78cd96136f83" dependencies = [ - "atty", - "bitflags 1.3.2", + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22a7ef7f676155edfb82daa97f99441f3ebf4a58d5e32f295a56259f1b6facc8" +dependencies = [ + "anstream", + "anstyle", "clap_lex", - "indexmap", "strsim", - "termcolor", - "textwrap", ] [[package]] -name = "clap_lex" -version = "0.2.4" +name = "clap_derive" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" dependencies = [ - "os_str_bytes", + "heck", + "proc-macro2", + "quote", + "syn", ] +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "core-foundation" version = "0.9.4" @@ -525,10 +584,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] -name = "hashbrown" -version = "0.12.3" +name = "heck" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" @@ -809,22 +868,18 @@ dependencies = [ "icu_properties", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown", -] - [[package]] name = "ipnet" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itoa" version = "1.0.11" @@ -1021,7 +1076,7 @@ version = "0.10.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" dependencies = [ - "bitflags 2.5.0", + "bitflags", "cfg-if", "foreign-types", "libc", @@ -1069,12 +1124,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "os_str_bytes" -version = "6.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" - [[package]] name = "parking_lot" version = "0.12.2" @@ -1295,7 +1344,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" dependencies = [ - "bitflags 2.5.0", + "bitflags", ] [[package]] @@ -1382,7 +1431,7 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dade4812df5c384711475be5fcd8c162555352945401aed22a35bffeab61f657" dependencies = [ - "bitflags 2.5.0", + "bitflags", "errno", "libc", "linux-raw-sys", @@ -1432,7 +1481,7 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" dependencies = [ - "bitflags 2.5.0", + "bitflags", "core-foundation", "core-foundation-sys", "libc", @@ -1568,9 +1617,9 @@ dependencies = [ [[package]] name = "strsim" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" @@ -1628,27 +1677,12 @@ dependencies = [ "utf-8", ] -[[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" -dependencies = [ - "winapi-util", -] - [[package]] name = "termtree" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" -[[package]] -name = "textwrap" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" - [[package]] name = "tinystr" version = "0.7.6" @@ -1790,6 +1824,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "vcpkg" version = "0.2.15" @@ -1927,15 +1967,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" -dependencies = [ - "windows-sys 0.52.0", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -2005,6 +2036,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -2132,7 +2172,7 @@ version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" dependencies = [ - "bitflags 2.5.0", + "bitflags", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 50c194e8..16a2068f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ license = "CC0-1.0" atty = "0.2.14" # Used for highlighting network errors base64 = "0.22.1" # Used for integrity attributes chrono = "0.4.40" # Used for formatting output timestamp -clap = { version = "3.2.25", optional = true } # Used for processing CLI arguments +clap = { version = "4.5.32", features = ["derive"], optional = true } # Used for processing CLI arguments cssparser = "0.34.0" # Used for dealing with CSS encoding_rs = "0.8.35" # Used for parsing and converting document charsets html5ever = "0.27.0" # Used for all things DOM diff --git a/src/main.rs b/src/main.rs index 0408baab..15c842f2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,13 +2,116 @@ use std::fs; use std::io::{self, Error as IoError, Write}; use std::process; -use clap::{App, Arg, ArgAction}; +use clap::Parser; use tempfile::Builder; use monolith::cache::Cache; use monolith::cookies::parse_cookie_file_contents; use monolith::core::{create_monolithic_document, print_error_message, Options}; +const ASCII: &str = " \ + _____ ______________ __________ ___________________ ___ +| \\ / \\ | | | | | | +| \\_/ __ \\_| __ | | ___ ___ |__| | +| | | | | | | | | | | | +| |\\ /| |__| _ |__| |____| | | | | __ | +| | \\___/ | | \\ | | | | | | | +|___| |__________| \\_____________________| |___| |___| |___| +"; + +#[derive(Parser)] +#[command(name = env!("CARGO_PKG_NAME"))] +#[command(version)] // Read version from Cargo.toml +#[command(about = ASCII.to_owned() + "\n" + env!("CARGO_PKG_NAME") + " " + env!("CARGO_PKG_VERSION") + "\n\n" + env!("CARGO_PKG_DESCRIPTION"), long_about = None)] +struct Cli { + /// Remove audio sources + #[arg(short = 'a', long)] + no_audio: bool, + + /// Set custom base URL + #[arg(short, long, value_name = "http://localhost/")] + base_url: Option, + + /// Treat specified domains as blacklist + #[arg(short = 'B', long)] + blacklist_domains: bool, + + /// Remove CSS + #[arg(short = 'c', long)] + no_css: bool, + + /// Specify cookie file + #[arg(short = 'C', long, value_name = "cookies.txt")] + cookie_file: Option, + + /// Specify domains to use for white/black-listing + #[arg(short = 'd', long = "domain", value_name = "example.com")] + domains: Vec, + + /// Ignore network errors + #[arg(short = 'e', long)] + ignore_errors: bool, + + /// Enforce custom charset + #[arg(short = 'E', long, value_name = "UTF-8")] + encoding: Option, + + /// Remove frames and iframes + #[arg(short = 'f', long)] + no_frames: bool, + + /// Remove fonts + #[arg(short = 'F', long)] + no_fonts: bool, + + /// Remove images + #[arg(short = 'i', long)] + no_images: bool, + + /// Cut off document from the Internet + #[arg(short = 'I', long)] + isolate: bool, + + /// Remove JavaScript + #[arg(short = 'j', long)] + no_js: bool, + + /// Allow invalid X.509 (TLS) certificates + #[arg(short = 'k', long)] + insecure: bool, + + /// Exclude timestamp and source information + #[arg(short = 'M', long)] + no_metadata: bool, + + /// Replace NOSCRIPT elements with their contents + #[arg(short = 'n', long)] + unwrap_noscript: bool, + + /// File to write to, use - for STDOUT + #[arg(short, long, value_name = "result.html")] + output: Option, + + /// Suppress verbosity + #[arg(short, long)] + quiet: bool, + + /// Adjust network request timeout + #[arg(short, long, value_name = "60")] + timeout: Option, + + /// Set custom User-Agent string + #[arg(short, long, value_name = "Firefox")] + user_agent: Option, + + /// Remove video sources + #[arg(short = 'v', long)] + no_video: bool, + + /// URL or file path, use - for STDIN + target: String, +} + enum Output { Stdout(io::Stdout), File(fs::File), @@ -45,119 +148,48 @@ impl Output { } } -const ASCII: &str = " \ - _____ ______________ __________ ___________________ ___ -| \\ / \\ | | | | | | -| \\_/ __ \\_| __ | | ___ ___ |__| | -| | | | | | | | | | | | -| |\\ /| |__| _ |__| |____| | | | | __ | -| | \\___/ | | \\ | | | | | | | -|___| |__________| \\_____________________| |___| |___| |___| -"; const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 10; // Minimum file size for on-disk caching (in bytes) const DEFAULT_NETWORK_TIMEOUT: u64 = 120; const DEFAULT_USER_AGENT: &str = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0"; fn main() { - // Process CLI flags and options - let mut cookie_file_path: Option = None; + let cli = Cli::parse(); + let cookie_file_path; let mut exit_code = 0; let mut options: Options = Options::default(); - let source; let destination; + + // Process the command { - let app = App::new(env!("CARGO_PKG_NAME")) - .version(env!("CARGO_PKG_VERSION")) - .author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str()) - .about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str()) - .args_from_usage("-a, --no-audio 'Remove audio sources'") - .args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'") - .args_from_usage( - "-B, --blacklist-domains 'Treat list of specified domains as blacklist'", - ) - .args_from_usage("-c, --no-css 'Remove CSS'") - .args_from_usage("-C, --cookie-file=[cookies.txt] 'Specify cookie file'") - .arg( - Arg::with_name("domains") - .short('d') - .long("domain") - .takes_value(true) - .value_name("example.com") - .action(ArgAction::Append) - .help("Specify domains to use for white/black-listing"), - ) - .args_from_usage("-e, --ignore-errors 'Ignore network errors'") - .args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'") - .args_from_usage("-f, --no-frames 'Remove frames and iframes'") - .args_from_usage("-F, --no-fonts 'Remove fonts'") - .args_from_usage("-i, --no-images 'Remove images'") - .args_from_usage("-I, --isolate 'Cut off document from the Internet'") - .args_from_usage("-j, --no-js 'Remove JavaScript'") - .args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'") - .args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'") - .args_from_usage( - "-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'", - ) - .args_from_usage( - "-o, --output=[document.html] 'Write output to , use - for STDOUT'", - ) - .args_from_usage("-q, --quiet 'Suppress verbosity'") - .args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'") - .args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'") - .args_from_usage("-v, --no-video 'Remove video sources'") - .arg( - Arg::with_name("target") - .required(true) - .takes_value(true) - .index(1) - .help("URL or file path, use - for STDIN"), - ) - .get_matches(); - - // Process the command - source = app - .value_of("target") - .expect("please set target") - .to_string(); - options.no_audio = app.is_present("no-audio"); - if let Some(base_url) = app.value_of("base-url") { - options.base_url = Some(base_url.to_string()); + options.base_url = cli.base_url; + options.blacklist_domains = cli.blacklist_domains; + options.encoding = cli.encoding; + if !cli.domains.is_empty() { + options.domains = Some(cli.domains); } - options.blacklist_domains = app.is_present("blacklist-domains"); - options.no_css = app.is_present("no-css"); - if let Some(cookie_file) = app.value_of("cookie-file") { - cookie_file_path = Some(cookie_file.to_string()); - } - if let Some(encoding) = app.value_of("encoding") { - options.encoding = Some(encoding.to_string()); - } - if let Some(domains) = app.get_many::("domains") { - let list_of_domains: Vec = domains.cloned().collect::>(); - options.domains = Some(list_of_domains); - } - options.ignore_errors = app.is_present("ignore-errors"); - options.no_frames = app.is_present("no-frames"); - options.no_fonts = app.is_present("no-fonts"); - options.no_images = app.is_present("no-images"); - options.isolate = app.is_present("isolate"); - options.no_js = app.is_present("no-js"); - options.insecure = app.is_present("insecure"); - options.no_metadata = app.is_present("no-metadata"); - destination = app.value_of("output").unwrap_or("").to_string(); - options.silent = app.is_present("quiet"); - options.timeout = app - .value_of("timeout") - .unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string()) - .parse::() - .unwrap(); - if let Some(user_agent) = app.value_of("user-agent") { - options.user_agent = Some(user_agent.to_string()); - } else { + options.ignore_errors = cli.ignore_errors; + options.insecure = cli.insecure; + options.isolate = cli.isolate; + options.no_audio = cli.no_audio; + options.no_css = cli.no_css; + options.no_fonts = cli.no_fonts; + options.no_frames = cli.no_frames; + options.no_images = cli.no_images; + options.no_js = cli.no_js; + options.no_metadata = cli.no_metadata; + options.no_video = cli.no_video; + options.silent = cli.quiet; + options.timeout = cli.timeout.unwrap_or(DEFAULT_NETWORK_TIMEOUT); + options.unwrap_noscript = cli.unwrap_noscript; + if cli.user_agent.is_none() { options.user_agent = Some(DEFAULT_USER_AGENT.to_string()); + } else { + options.user_agent = cli.user_agent; } - options.unwrap_noscript = app.is_present("unwrap-noscript"); - options.no_video = app.is_present("no-video"); + + cookie_file_path = cli.cookie_file; + destination = cli.output.clone(); } // Set up cache (attempt to create temporary file) @@ -212,10 +244,12 @@ fn main() { } } - match create_monolithic_document(source, &options, &mut cache) { + // Retrieve target from source and output result + match create_monolithic_document(cli.target, &options, &mut cache) { Ok(result) => { // Define output - let mut output = Output::new(&destination).expect("could not prepare output"); + let mut output = Output::new(&destination.unwrap_or(String::new())) + .expect("could not prepare output"); // Write result into STDOUT or file output.write(&result).expect("could not write output");