diff --git a/Cargo.lock b/Cargo.lock index 8d1f80d2..e17719ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -435,7 +435,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -600,9 +600,9 @@ dependencies = [ [[package]] name = "html5ever" -version = "0.27.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +checksum = "0ff6858c1f7e2a470c5403091866fa95b36fe0dbac5d771f932c15e5ff1ee501" dependencies = [ "log", "mac", @@ -888,10 +888,11 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -943,9 +944,9 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" [[package]] name = "markup5ever" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +checksum = "d581ff8be69d08a2efa23a959d81aa22b739073f749f067348bd4f4ba4b69195" dependencies = [ "log", "phf", @@ -957,9 +958,9 @@ dependencies = [ [[package]] name = "markup5ever_rcdom" -version = "0.3.0" +version = "0.4.0-unofficial" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18" +checksum = "735d687429167a6b78304c018d57d6d91b6f8be38af495b004a23934720a3f03" dependencies = [ "html5ever", "markup5ever", @@ -1378,9 +1379,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.12" +version = "0.12.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" +checksum = "989e327e510263980e231de548a33e63d34962d29ae61b467389a1a09627a254" dependencies = [ "async-compression", "base64", @@ -1435,7 +1436,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1454,6 +1455,12 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" +[[package]] +name = "rustversion" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" + [[package]] name = "ryu" version = "1.0.18" @@ -1654,16 +1661,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.18.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c317e0a526ee6120d8dabad239c8dadca62b24b6f168914bbbc8e2fb1f0e567" +checksum = "488960f40a3fd53d72c2a29a58722561dee8afdd175bd88e3db4677d7b2ba600" dependencies = [ - "cfg-if", "fastrand", "getrandom 0.3.1", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1877,23 +1883,24 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", + "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn", @@ -1914,9 +1921,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1924,9 +1931,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -1937,9 +1944,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "web-sys" @@ -1990,32 +2000,31 @@ checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" [[package]] name = "windows-registry" -version = "0.2.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ "windows-result", "windows-strings", - "windows-targets 0.52.6", + "windows-targets 0.53.0", ] [[package]] name = "windows-result" -version = "0.2.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +checksum = "06374efe858fab7e4f881500e6e86ec8bc28f9462c47e5a9941a0142ad86b189" dependencies = [ - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.1.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" dependencies = [ - "windows-result", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -2069,13 +2078,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -2088,6 +2113,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -2100,6 +2131,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -2112,12 +2149,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -2130,6 +2179,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -2142,6 +2197,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -2154,6 +2215,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -2166,6 +2233,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "wit-bindgen-rt" version = "0.33.0" @@ -2189,9 +2262,9 @@ checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" [[package]] name = "xml5ever" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c376f76ed09df711203e20c3ef5ce556f0166fa03d39590016c0fd625437fad" +checksum = "d7b906d34d867d216b2d79fb0e9470aaa7f4948ea86b44c27846efedd596076c" dependencies = [ "log", "mac", diff --git a/Cargo.toml b/Cargo.toml index 16a2068f..978975cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,12 +29,12 @@ chrono = "0.4.40" # Used for formatting output timestamp clap = { version = "4.5.32", features = ["derive"], optional = true } # Used for processing CLI arguments cssparser = "0.34.0" # Used for dealing with CSS encoding_rs = "0.8.35" # Used for parsing and converting document charsets -html5ever = "0.27.0" # Used for all things DOM -markup5ever_rcdom = "0.3.0" # Used for manipulating DOM +html5ever = "0.28.0" # Used for all things DOM +markup5ever_rcdom = "=0.4.0-unofficial" # Used for manipulating DOM percent-encoding = "2.3.1" # Used for encoding URLs sha2 = "0.10.8" # Used for calculating checksums during integrity checks -redb = "2.4.0" # Used for on-disk caching of downloaded assets -tempfile = { version = "3.18.0", optional = true } # Used for on-disk caching of downloaded assets +redb = "2.4.0" # Used for on-disk caching of remote assets +tempfile = { version = "3.19.0", optional = true } # Used for on-disk caching of remote assets url = "2.5.4" # Used for parsing URLs openssl = "0.10.71" # Used for static linking of the OpenSSL library @@ -46,7 +46,7 @@ features = ["std", "perf-dfa", "unicode-perl"] # Used for making network requests [dependencies.reqwest] -version = "0.12.12" +version = "0.12.14" default-features = false features = ["default-tls", "blocking", "gzip", "brotli", "deflate"] diff --git a/src/html.rs b/src/html.rs index d7f265d3..69bdf605 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,12 +1,12 @@ use base64::prelude::*; use chrono::prelude::*; use encoding_rs::Encoding; -use html5ever::interface::QualName; +use html5ever::interface::{Attribute, QualName}; use html5ever::parse_document; use html5ever::serialize::{serialize, SerializeOpts}; use html5ever::tendril::{format_tendril, TendrilSink}; -use html5ever::tree_builder::{Attribute, TreeSink}; -use html5ever::{local_name, namespace_url, ns, LocalName}; +use html5ever::tree_builder::create_element; +use html5ever::{namespace_url, ns, LocalName}; use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle}; use regex::Regex; use reqwest::blocking::Client; @@ -50,23 +50,22 @@ pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom { ) .expect("unable to serialize DOM into buffer"); - let mut dom = html_to_dom(&buf, "utf-8".to_string()); - let doc = dom.get_document(); - if let Some(html) = get_child_node_by_name(&doc, "html") { + let dom = html_to_dom(&buf, "utf-8".to_string()); + if let Some(html) = get_child_node_by_name(&dom.document, "html") { if let Some(head) = get_child_node_by_name(&html, "head") { - let favicon_node = dom.create_element( - QualName::new(None, ns!(), local_name!("link")), + let favicon_node = create_element( + &dom, + QualName::new(None, ns!(), LocalName::from("link")), vec![ Attribute { - name: QualName::new(None, ns!(), local_name!("rel")), + name: QualName::new(None, ns!(), LocalName::from("rel")), value: format_tendril!("icon"), }, Attribute { - name: QualName::new(None, ns!(), local_name!("href")), + name: QualName::new(None, ns!(), LocalName::from("href")), value: format_tendril!("{}", favicon_data_url), }, ], - Default::default(), ); // Insert favicon LINK tag into HEAD head.children.borrow_mut().push(favicon_node.clone()); @@ -461,21 +460,20 @@ pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom { ) .expect("unable to serialize DOM into buffer"); - let mut dom = html_to_dom(&buf, "utf-8".to_string()); - let doc = dom.get_document(); - if let Some(html_node) = get_child_node_by_name(&doc, "html") { + let dom = html_to_dom(&buf, "utf-8".to_string()); + if let Some(html_node) = get_child_node_by_name(&dom.document, "html") { if let Some(head_node) = get_child_node_by_name(&html_node, "head") { // Check if BASE node already exists in the DOM tree if let Some(base_node) = get_child_node_by_name(&head_node, "base") { set_node_attr(&base_node, "href", Some(desired_base_href)); } else { - let base_node = dom.create_element( - QualName::new(None, ns!(), local_name!("base")), + let base_node = create_element( + &dom, + QualName::new(None, ns!(), LocalName::from("base")), vec![Attribute { - name: QualName::new(None, ns!(), local_name!("href")), + name: QualName::new(None, ns!(), LocalName::from("href")), value: format_tendril!("{}", desired_base_href), }], - Default::default(), ); // Insert newly created BASE node into HEAD @@ -487,7 +485,7 @@ pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom { dom } -pub fn set_charset(mut dom: RcDom, desired_charset: String) -> RcDom { +pub fn set_charset(dom: RcDom, desired_charset: String) -> RcDom { if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(&dom.document) { if get_node_attr(&meta_charset_node, "charset").is_some() { set_node_attr(&meta_charset_node, "charset", Some(desired_charset)); @@ -499,13 +497,13 @@ pub fn set_charset(mut dom: RcDom, desired_charset: String) -> RcDom { ); } } else { - let meta_charset_node = dom.create_element( - QualName::new(None, ns!(), local_name!("meta")), + let meta_charset_node: Handle = create_element( + &dom, + QualName::new(None, ns!(), LocalName::from("meta")), vec![Attribute { - name: QualName::new(None, ns!(), local_name!("charset")), + name: QualName::new(None, ns!(), LocalName::from("charset")), value: format_tendril!("{}", desired_charset), }], - Default::default(), ); // Insert newly created META charset node into HEAD @@ -559,9 +557,8 @@ pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option) }; } -pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &Options) -> Vec { +pub fn serialize_document(dom: RcDom, document_encoding: String, options: &Options) -> Vec { let mut buf: Vec = Vec::new(); - let document = dom.get_document(); if options.isolate || options.no_css @@ -571,21 +568,21 @@ pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &O || options.no_images { // Take care of CSP - if let Some(html) = get_child_node_by_name(&document, "html") { + if let Some(html) = get_child_node_by_name(&dom.document, "html") { if let Some(head) = get_child_node_by_name(&html, "head") { - let meta = dom.create_element( - QualName::new(None, ns!(), local_name!("meta")), + let meta = create_element( + &dom, + QualName::new(None, ns!(), LocalName::from("meta")), vec![ Attribute { - name: QualName::new(None, ns!(), local_name!("http-equiv")), + name: QualName::new(None, ns!(), LocalName::from("http-equiv")), value: format_tendril!("Content-Security-Policy"), }, Attribute { - name: QualName::new(None, ns!(), local_name!("content")), + name: QualName::new(None, ns!(), LocalName::from("content")), value: format_tendril!("{}", compose_csp(options)), }, ], - Default::default(), ); // The CSP meta-tag has to be prepended, never appended, // since there already may be one defined in the original document, @@ -597,12 +594,9 @@ pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &O } } - serialize( - &mut buf, - &SerializableHandle::from(document.clone()), - SerializeOpts::default(), - ) - .expect("Unable to serialize DOM into buffer"); + let serializable: SerializableHandle = dom.document.into(); + serialize(&mut buf, &serializable, SerializeOpts::default()) + .expect("Unable to serialize DOM into buffer"); // Unwrap NOSCRIPT elements if options.unwrap_noscript { @@ -634,7 +628,7 @@ pub fn retrieve_and_embed_asset( let resolved_url: Url = resolve_url(document_url, attr_value); match retrieve_asset(cache, client, &document_url.clone(), &resolved_url, options) { - Ok((data, final_url, mut media_type, charset)) => { + Ok((data, final_url, media_type, charset)) => { let node_name: &str = get_node_name(node).unwrap(); // Check integrity if it's a LINK or SCRIPT element @@ -677,12 +671,8 @@ pub fn retrieve_and_embed_asset( walk_and_embed_assets(cache, client, &final_url, &frame_dom.document, options); let mut frame_data: Vec = Vec::new(); - serialize( - &mut frame_data, - &SerializableHandle::from(frame_dom.document.clone()), - SerializeOpts::default(), - ) - .unwrap(); + let serializable: SerializableHandle = frame_dom.document.into(); + serialize(&mut frame_data, &serializable, SerializeOpts::default()).unwrap(); // Create and embed data URL let mut frame_data_url = @@ -694,18 +684,31 @@ pub fn retrieve_and_embed_asset( // Parse media type for SCRIPT elements if node_name == "script" && get_node_attr(node, "src").is_some() { - if let Some(script_node_type_attr_value) = get_node_attr(node, "type") { - media_type = script_node_type_attr_value.to_string(); + let script_media_type = + get_node_attr(node, "type").unwrap_or(String::from("text/javascript")); + + if script_media_type == "text/javascript" { + // TODO: embed content here instead of using data URLs + + // Create and embed data URL + let mut data_url = + create_data_url(&script_media_type, &charset, &data, &final_url); + data_url.set_fragment(resolved_url.fragment()); + set_node_attr(node, attr_name, Some(data_url.to_string())); } else { - // Fallback to default one if it's not specified - media_type = "application/javascript".to_string(); + // Create and embed data URL + let mut data_url = + create_data_url(&script_media_type, &charset, &data, &final_url); + data_url.set_fragment(resolved_url.fragment()); + set_node_attr(node, attr_name, Some(data_url.to_string())); } + } else { + // Create and embed data URL + let mut data_url = + create_data_url(&media_type, &charset, &data, &final_url); + data_url.set_fragment(resolved_url.fragment()); + set_node_attr(node, attr_name, Some(data_url.to_string())); } - - // Create and embed data URL - let mut data_url = create_data_url(&media_type, &charset, &data, &final_url); - data_url.set_fragment(resolved_url.fragment()); - set_node_attr(node, attr_name, Some(data_url.to_string())); } } } @@ -1035,25 +1038,25 @@ pub fn walk_and_embed_assets( } "script" => { // Read values of integrity and src attributes - let script_attr_src: Option = get_node_attr(node, "src"); + let script_attr_src: &str = &get_node_attr(node, "src").unwrap_or_default(); if options.no_js { // Empty inner content node.children.borrow_mut().clear(); // Remove src attribute - if script_attr_src.is_some() { + if !script_attr_src.is_empty() { set_node_attr(node, "src", None); // Wipe integrity attribute set_node_attr(node, "integrity", None); } - } else if !script_attr_src.clone().unwrap_or_default().is_empty() { + } else if !script_attr_src.is_empty() { retrieve_and_embed_asset( cache, client, document_url, node, "src", - &script_attr_src.unwrap_or_default(), + script_attr_src, options, ); } @@ -1192,12 +1195,9 @@ pub fn walk_and_embed_assets( { if let Some(body) = get_child_node_by_name(&html, "body") { let mut buf: Vec = Vec::new(); - serialize( - &mut buf, - &SerializableHandle::from(body.clone()), - SerializeOpts::default(), - ) - .expect("Unable to serialize DOM into buffer"); + let serializable: SerializableHandle = body.into(); + serialize(&mut buf, &serializable, SerializeOpts::default()) + .expect("Unable to serialize DOM into buffer"); let result = String::from_utf8_lossy(&buf); noscript_contents.push_slice(&result); } diff --git a/tests/cli/data_url.rs b/tests/cli/data_url.rs index 6c97934f..56cb0be4 100644 --- a/tests/cli/data_url.rs +++ b/tests/cli/data_url.rs @@ -221,7 +221,7 @@ mod failing { // STDOUT should contain HTML without contents of local JS file assert_eq!( String::from_utf8_lossy(&out.stdout), - "\n" + "\n" ); // Exit code should be 0 diff --git a/tests/cli/local_files.rs b/tests/cli/local_files.rs index 5d77b851..6749dd58 100644 --- a/tests/cli/local_files.rs +++ b/tests/cli/local_files.rs @@ -62,7 +62,7 @@ mod passing { \"\"\n \ Tricky href\n \ Remote URL\n \ - \n\n\n\n\ + \n\n\n\n\ \n\ " ); @@ -258,7 +258,7 @@ mod passing { \n \ \n\n\n\n \

This page should have black background and white foreground, but only when served via http: (not via file:)

\n \ - \n \ + \n \ \n\n\n\n\ \n\ "