diff --git a/Cargo.lock b/Cargo.lock index 713c258a..0346539d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -191,9 +191,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytes" -version = "1.6.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cc" @@ -209,16 +209,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -328,6 +328,17 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -351,18 +362,18 @@ dependencies = [ [[package]] name = "encoding_rs" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", "windows-sys 0.52.0", @@ -492,7 +503,19 @@ checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets 0.52.6", ] [[package]] @@ -607,9 +630,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.7" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", @@ -620,7 +643,6 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower", "tower-service", "tracing", ] @@ -648,14 +670,143 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "icu_normalizer", + "icu_properties", ] [[package]] @@ -697,15 +848,21 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.167" +version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" [[package]] name = "linux-raw-sys" -version = "0.4.14" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "6db9c683daf087dc577b7506e9695b3d556a9f3849903fa28186283afd6809e9" + +[[package]] +name = "litemap" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856" [[package]] name = "lock_api" @@ -783,7 +940,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.48.0", ] @@ -860,9 +1017,9 @@ checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "openssl" -version = "0.10.64" +version = "0.10.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" dependencies = [ "bitflags 2.5.0", "cfg-if", @@ -901,9 +1058,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.102" +version = "0.9.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +checksum = "8bb61ea9811cc39e3c2069f40b8b8e2e70d8569b361f879786cc7ed48b777cdd" dependencies = [ "cc", "libc", @@ -1018,26 +1175,6 @@ dependencies = [ "siphasher", ] -[[package]] -name = "pin-project" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "pin-project-lite" version = "0.2.14" @@ -1140,7 +1277,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.14", ] [[package]] @@ -1163,9 +1300,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.6" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -1175,9 +1312,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -1186,15 +1323,15 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "reqwest" -version = "0.12.7" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" +checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" dependencies = [ "async-compression", "base64", @@ -1224,6 +1361,7 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-util", + "tower", "tower-service", "url", "wasm-bindgen", @@ -1240,9 +1378,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustix" -version = "0.38.41" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" +checksum = "dade4812df5c384711475be5fcd8c162555352945401aed22a35bffeab61f657" dependencies = [ "bitflags 2.5.0", "errno", @@ -1396,6 +1534,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "string_cache" version = "0.8.7" @@ -1448,14 +1592,26 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tempfile" -version = "3.14.0" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +checksum = "2c317e0a526ee6120d8dabad239c8dadca62b24b6f168914bbbc8e2fb1f0e567" dependencies = [ "cfg-if", "fastrand", + "getrandom 0.3.1", "once_cell", "rustix", "windows-sys 0.52.0", @@ -1494,20 +1650,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] -name = "tinyvec" -version = "1.6.0" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "tinyvec_macros", + "displaydoc", + "zerovec", ] -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - [[package]] name = "tokio" version = "1.37.0" @@ -1548,14 +1699,14 @@ dependencies = [ [[package]] name = "tower" -version = "0.4.13" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", - "pin-project", "pin-project-lite", + "sync_wrapper", "tokio", "tower-layer", "tower-service", @@ -1569,9 +1720,9 @@ checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -1604,32 +1755,17 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] - [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -1642,6 +1778,18 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "vcpkg" version = "0.2.15" @@ -1678,6 +1826,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.92" @@ -1794,6 +1951,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-link" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dccfd733ce2b1753b03b6d3c65edf020262ea35e20ccdf3e288043e6dd620e3" + [[package]] name = "windows-registry" version = "0.2.0" @@ -1963,6 +2126,27 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.5.0", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xml5ever" version = "0.18.0" @@ -1973,3 +2157,70 @@ dependencies = [ "mac", "markup5ever", ] + +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 9fc9bd35..f38a645a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,28 +25,28 @@ license = "CC0-1.0" [dependencies] atty = "0.2.14" # Used for highlighting network errors base64 = "0.22.1" # Used for integrity attributes -chrono = "0.4.38" # Used for formatting output timestamp +chrono = "0.4.40" # Used for formatting output timestamp clap = { version = "3.2.25", optional = true } # Used for processing CLI arguments cssparser = "0.34.0" # Used for dealing with CSS -encoding_rs = "0.8.34" # Used for parsing and converting document charsets +encoding_rs = "0.8.35" # Used for parsing and converting document charsets html5ever = "0.27.0" # Used for all things DOM markup5ever_rcdom = "0.3.0" # Used for manipulating DOM percent-encoding = "2.3.1" # Used for encoding URLs sha2 = "0.10.8" # Used for calculating checksums during integrity checks redb = "2.4.0" # Used for on-disk caching of downloaded assets -tempfile = { version = "3.14.0", optional = true } # Used for on-disk caching of downloaded assets -url = "2.5.2" # Used for parsing URLs -openssl = "0.10.64" # Used for static linking of the OpenSSL library +tempfile = { version = "3.18.0", optional = true } # Used for on-disk caching of downloaded assets +url = "2.5.4" # Used for parsing URLs +openssl = "0.10.71" # Used for static linking of the OpenSSL library # Used for parsing srcset and NOSCRIPT [dependencies.regex] -version = "1.10.6" +version = "1.11.1" default-features = false features = ["std", "perf-dfa", "unicode-perl"] # Used for making network requests [dependencies.reqwest] -version = "0.12.7" +version = "0.12.12" default-features = false features = ["default-tls", "blocking", "gzip", "brotli", "deflate"] diff --git a/src/utils.rs b/src/core.rs similarity index 51% rename from src/utils.rs rename to src/core.rs index cf509744..445ddba4 100644 --- a/src/utils.rs +++ b/src/core.rs @@ -1,16 +1,90 @@ -use reqwest::blocking::Client; -use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER}; use std::fs; +use std::io::{self, prelude::*, Error, Write}; use std::path::{Path, PathBuf}; +use std::process; +use std::time::Duration; + +use encoding_rs::Encoding; +use markup5ever_rcdom::RcDom; +use reqwest::blocking::Client; +use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; use url::Url; use crate::cache::Cache; -use crate::opts::Options; -use crate::url::{clean_url, get_referer_url, parse_data_url}; +use crate::cookies::Cookie; +use crate::html::{ + add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom, + serialize_document, set_base_url, set_charset, walk_and_embed_assets, +}; +use crate::url::{clean_url, create_data_url, get_referer_url, parse_data_url, resolve_url}; + +#[derive(Default)] +pub struct Options { + pub base_url: Option, + pub blacklist_domains: bool, + // pub cache: Option, + pub cookies: Vec, + pub domains: Option>, + pub encoding: Option, + pub ignore_errors: bool, + pub insecure: bool, + pub isolate: bool, + pub no_audio: bool, + pub no_color: bool, + pub no_css: bool, + pub no_fonts: bool, + pub no_frames: bool, + pub no_images: bool, + pub no_js: bool, + pub no_metadata: bool, + pub no_video: bool, + pub output: String, + pub silent: bool, + pub target: String, + pub timeout: u64, + pub unwrap_noscript: bool, + pub user_agent: Option, +} + +enum Output { + Stdout(io::Stdout), + File(fs::File), +} + +impl Output { + fn new(file_path: &str) -> Result { + if file_path.is_empty() || file_path.eq("-") { + Ok(Output::Stdout(io::stdout())) + } else { + Ok(Output::File(fs::File::create(file_path)?)) + } + } + + fn write(&mut self, bytes: &Vec) -> Result<(), Error> { + match self { + Output::Stdout(stdout) => { + stdout.write_all(bytes)?; + // Ensure newline at end of output + if bytes.last() != Some(&b"\n"[0]) { + stdout.write(b"\n")?; + } + stdout.flush() + } + Output::File(file) => { + file.write_all(bytes)?; + // Ensure newline at end of output + if bytes.last() != Some(&b"\n"[0]) { + file.write(b"\n")?; + } + file.flush() + } + } + } +} const ANSI_COLOR_RED: &'static str = "\x1b[31m"; const ANSI_COLOR_RESET: &'static str = "\x1b[0m"; -const MAGIC: [[&[u8]; 2]; 18] = [ +const FILE_SIGNATURES: [[&[u8]; 2]; 18] = [ // Image [b"GIF87a", b"image/gif"], [b"GIF89a", b"image/gif"], @@ -39,11 +113,278 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ "image/svg+xml", ]; +pub fn create_monolithic_file(options: &Options, mut cache: &mut Cache) { + // Check if target was provided + if options.target.len() == 0 { + if !options.silent { + eprintln!("No target specified"); + } + process::exit(1); + } + + // Check if custom encoding value is acceptable + if let Some(custom_encoding) = options.encoding.clone() { + if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() { + eprintln!("Unknown encoding: {}", &custom_encoding); + process::exit(1); + } + } + + let mut use_stdin: bool = false; + + let target_url = match options.target.as_str() { + "-" => { + // Read from pipe (stdin) + use_stdin = true; + // Set default target URL to an empty data URL; the user can set it via --base-url + Url::parse("data:text/html,").unwrap() + } + target => match Url::parse(&target) { + Ok(url) => match url.scheme() { + "data" | "file" | "http" | "https" => url, + unsupported_scheme => { + if !options.silent { + eprintln!("Unsupported target URL type: {}", unsupported_scheme); + } + process::exit(1) + } + }, + Err(_) => { + // Failed to parse given base URL (perhaps it's a filesystem path?) + let path: &Path = Path::new(&target); + match path.exists() { + true => match path.is_file() { + true => { + let canonical_path = fs::canonicalize(&path).unwrap(); + match Url::from_file_path(canonical_path) { + Ok(url) => url, + Err(_) => { + if !options.silent { + eprintln!( + "Could not generate file URL out of given path: {}", + &target + ); + } + process::exit(1); + } + } + } + false => { + if !options.silent { + eprintln!("Local target is not a file: {}", &target); + } + process::exit(1); + } + }, + false => { + // It is not a FS path, now we do what browsers do: + // prepend "http://" and hope it points to a website + Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target)) + .unwrap() + } + } + } + }, + }; + + // Initialize HTTP client + let mut header_map = HeaderMap::new(); + if let Some(user_agent) = &options.user_agent { + header_map.insert( + USER_AGENT, + HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"), + ); + } + let client = Client::builder() + .timeout(Duration::from_secs(if options.timeout > 0 { + options.timeout + } else { + // We have to specify something that eventually makes the program fail + // (to prevent it from hanging forever) + 600 + })) + .danger_accept_invalid_certs(options.insecure) + .default_headers(header_map) + .build() + .expect("Failed to initialize HTTP client"); + + // At first we assume that base URL is same as target URL + let mut base_url: Url = target_url.clone(); + + let data: Vec; + let mut document_encoding: String = "".to_string(); + let mut dom: RcDom; + + // Retrieve target document + if use_stdin { + data = read_stdin(); + } else if target_url.scheme() == "file" + || target_url.scheme() == "http" + || target_url.scheme() == "https" + || target_url.scheme() == "data" + { + match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options) { + Ok((retrieved_data, final_url, media_type, charset)) => { + // Provide output as text without processing it, the way browsers do + if !media_type.eq_ignore_ascii_case("text/html") + && !media_type.eq_ignore_ascii_case("application/xhtml+xml") + { + // Define output + let mut output = + Output::new(&options.output).expect("Could not prepare output"); + + // Write retrieved data into STDOUT or file + output + .write(&retrieved_data) + .expect("Could not write output"); + + // Nothing else to do past this point + process::exit(0); + } + + if options + .base_url + .clone() + .unwrap_or("".to_string()) + .is_empty() + { + base_url = final_url; + } + + data = retrieved_data; + document_encoding = charset; + } + Err(_) => { + if !options.silent { + eprintln!("Could not retrieve target document"); + } + process::exit(1); + } + } + } else { + process::exit(1); + } + + // Initial parse + dom = html_to_dom(&data, document_encoding.clone()); + + // TODO: investigate if charset from filesystem/data URL/HTTP headers + // has say over what's specified in HTML + + // Attempt to determine document's charset + if let Some(html_charset) = get_charset(&dom.document) { + if !html_charset.is_empty() { + // Check if the charset specified inside HTML is valid + if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) { + document_encoding = html_charset; + dom = html_to_dom(&data, encoding.name().to_string()); + } + } + } + + // Use custom base URL if specified, read and use what's in the DOM otherwise + let custom_base_url: String = options.base_url.clone().unwrap_or("".to_string()); + if custom_base_url.is_empty() { + // No custom base URL is specified + // Try to see if document has BASE element + if let Some(existing_base_url) = get_base_url(&dom.document) { + base_url = resolve_url(&target_url, &existing_base_url); + } + } else { + // Custom base URL provided + match Url::parse(&custom_base_url) { + Ok(parsed_url) => { + if parsed_url.scheme() == "file" { + // File base URLs can only work with + // documents saved from filesystem + if target_url.scheme() == "file" { + base_url = parsed_url; + } + } else { + base_url = parsed_url; + } + } + Err(_) => { + // Failed to parse given base URL, perhaps it's a filesystem path? + if target_url.scheme() == "file" { + // Relative paths could work for documents saved from filesystem + let path: &Path = Path::new(&custom_base_url); + if path.exists() { + match Url::from_file_path(fs::canonicalize(&path).unwrap()) { + Ok(file_url) => { + base_url = file_url; + } + Err(_) => { + if !options.silent { + eprintln!( + "Could not map given path to base URL: {}", + custom_base_url + ); + } + process::exit(1); + } + } + } + } + } + } + } + + // Traverse through the document and embed remote assets + walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options); + + // Update or add new BASE element to reroute network requests and hash-links + if let Some(new_base_url) = options.base_url.clone() { + dom = set_base_url(&dom.document, new_base_url); + } + + // Request and embed /favicon.ico (unless it's already linked in the document) + if !options.no_images + && (target_url.scheme() == "http" || target_url.scheme() == "https") + && !has_favicon(&dom.document) + { + let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico"); + + match retrieve_asset(&mut cache, &client, &target_url, &favicon_ico_url, &options) { + Ok((data, final_url, media_type, charset)) => { + let favicon_data_url: Url = + create_data_url(&media_type, &charset, &data, &final_url); + dom = add_favicon(&dom.document, favicon_data_url.to_string()); + } + Err(_) => { + // Failed to retrieve /favicon.ico + } + } + } + + // Save using specified charset, if given + if let Some(custom_encoding) = options.encoding.clone() { + document_encoding = custom_encoding; + dom = set_charset(dom, document_encoding.clone()); + } + + // Serialize DOM tree + let mut result: Vec = serialize_document(dom, document_encoding, &options); + + // Prepend metadata comment tag + if !options.no_metadata { + let mut metadata_comment: String = create_metadata_tag(&target_url); + metadata_comment += "\n"; + result.splice(0..0, metadata_comment.as_bytes().to_vec()); + } + + // Define output + let mut output = Output::new(&options.output).expect("Could not prepare output"); + + // Write result into STDOUT or file + output.write(&result).expect("Could not write output"); +} + pub fn detect_media_type(data: &[u8], url: &Url) -> String { // At first attempt to read file's header - for magic_item in MAGIC.iter() { - if data.starts_with(magic_item[0]) { - return String::from_utf8(magic_item[1].to_vec()).unwrap(); + for file_signature in FILE_SIGNATURES.iter() { + if data.starts_with(file_signature[0]) { + return String::from_utf8(file_signature[1].to_vec()).unwrap(); } } @@ -395,3 +736,12 @@ pub fn retrieve_asset( } } } + +pub fn read_stdin() -> Vec { + let mut buffer: Vec = vec![]; + + match io::stdin().lock().read_to_end(&mut buffer) { + Ok(_) => buffer, + Err(_) => buffer, + } +} diff --git a/src/css.rs b/src/css.rs index b2dccbab..9915976c 100644 --- a/src/css.rs +++ b/src/css.rs @@ -5,9 +5,8 @@ use reqwest::blocking::Client; use url::Url; use crate::cache::Cache; -use crate::opts::Options; +use crate::core::{retrieve_asset, Options}; use crate::url::{create_data_url, resolve_url, EMPTY_IMAGE_DATA_URL}; -use crate::utils::retrieve_asset; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ // Universal diff --git a/src/html.rs b/src/html.rs index 04cd6e78..41a7fa1e 100644 --- a/src/html.rs +++ b/src/html.rs @@ -15,13 +15,12 @@ use sha2::{Digest, Sha256, Sha384, Sha512}; use std::default::Default; use crate::cache::Cache; +use crate::core::{parse_content_type, retrieve_asset, Options}; use crate::css::embed_css; use crate::js::attr_is_event_handler; -use crate::opts::Options; use crate::url::{ clean_url, create_data_url, is_url_and_has_protocol, resolve_url, EMPTY_IMAGE_DATA_URL, }; -use crate::utils::{parse_content_type, retrieve_asset}; #[derive(PartialEq, Eq)] pub enum LinkType { diff --git a/src/lib.rs b/src/lib.rs index cbe75490..a8325951 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,7 @@ pub mod cache; pub mod cookies; +pub mod core; pub mod css; pub mod html; pub mod js; -pub mod opts; pub mod url; -pub mod utils; diff --git a/src/main.rs b/src/main.rs index d57c0196..929106a6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,155 +1,163 @@ -use std::fs; -use std::io::{self, prelude::*, Error, Write}; -use std::path::Path; +use std::env; +use std::fs::read_to_string; use std::process; -use std::time::Duration; -use encoding_rs::Encoding; -use markup5ever_rcdom::RcDom; -use reqwest::blocking::Client; -use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; +use clap::{App, Arg, ArgAction}; use tempfile::Builder; -use url::Url; use monolith::cache::Cache; use monolith::cookies::parse_cookie_file_contents; -use monolith::html::{ - add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom, - serialize_document, set_base_url, set_charset, walk_and_embed_assets, -}; -use monolith::opts::Options; -use monolith::url::{create_data_url, resolve_url}; -use monolith::utils::retrieve_asset; +use monolith::core::{create_monolithic_file, Options}; + +const ASCII: &'static str = " \ + _____ ______________ __________ ___________________ ___ +| \\ / \\ | | | | | | +| \\_/ __ \\_| __ | | ___ ___ |__| | +| | | | | | | | | | | | +| |\\ /| |__| _ |__| |____| | | | | __ | +| | \\___/ | | \\ | | | | | | | +|___| |__________| \\_____________________| |___| |___| |___| +"; +const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 50; // Minimum file size for on-disk caching (in bytes) +const DEFAULT_NETWORK_TIMEOUT: u64 = 120; +const DEFAULT_USER_AGENT: &'static str = + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0"; +const ENV_VAR_NO_COLOR: &str = "NO_COLOR"; +const ENV_VAR_TERM: &str = "TERM"; -enum Output { - Stdout(io::Stdout), - File(fs::File), -} - -impl Output { - fn new(file_path: &str) -> Result { - if file_path.is_empty() || file_path.eq("-") { - Ok(Output::Stdout(io::stdout())) - } else { - Ok(Output::File(fs::File::create(file_path)?)) +fn main() { + // Process CLI flags and options + let mut cookie_file_path: Option = None; + let mut options: Options = Options::default(); + { + let app = App::new(env!("CARGO_PKG_NAME")) + .version(env!("CARGO_PKG_VERSION")) + .author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str()) + .about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str()) + .args_from_usage("-a, --no-audio 'Remove audio sources'") + .args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'") + .args_from_usage( + "-B, --blacklist-domains 'Treat list of specified domains as blacklist'", + ) + .args_from_usage("-c, --no-css 'Remove CSS'") + .args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'") + .arg( + Arg::with_name("domains") + .short('d') + .long("domain") + .takes_value(true) + .value_name("example.com") + .action(ArgAction::Append) + .help("Specify domains to use for white/black-listing"), + ) + .args_from_usage("-e, --ignore-errors 'Ignore network errors'") + .args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'") + .args_from_usage("-f, --no-frames 'Remove frames and iframes'") + .args_from_usage("-F, --no-fonts 'Remove fonts'") + .args_from_usage("-i, --no-images 'Remove images'") + .args_from_usage("-I, --isolate 'Cut off document from the Internet'") + .args_from_usage("-j, --no-js 'Remove JavaScript'") + .args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'") + .args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'") + .args_from_usage( + "-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'", + ) + .args_from_usage( + "-o, --output=[document.html] 'Write output to , use - for STDOUT'", + ) + .args_from_usage("-s, --silent 'Suppress verbosity'") + .args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'") + .args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'") + .args_from_usage("-v, --no-video 'Remove video sources'") + .arg( + Arg::with_name("target") + .required(true) + .takes_value(true) + .index(1) + .help("URL or file path, use - for STDIN"), + ) + .get_matches(); + + // Process the command + options.target = app + .value_of("target") + .expect("please set target") + .to_string(); + options.no_audio = app.is_present("no-audio"); + if let Some(base_url) = app.value_of("base-url") { + options.base_url = Some(base_url.to_string()); } - } - - fn write(&mut self, bytes: &Vec) -> Result<(), Error> { - match self { - Output::Stdout(stdout) => { - stdout.write_all(bytes)?; - // Ensure newline at end of output - if bytes.last() != Some(&b"\n"[0]) { - stdout.write(b"\n")?; - } - stdout.flush() - } - Output::File(file) => { - file.write_all(bytes)?; - // Ensure newline at end of output - if bytes.last() != Some(&b"\n"[0]) { - file.write(b"\n")?; - } - file.flush() - } + options.blacklist_domains = app.is_present("blacklist-domains"); + options.no_css = app.is_present("no-css"); + if let Some(cookie_file) = app.value_of("cookies") { + cookie_file_path = Some(cookie_file.to_string()); } - } -} - -pub fn read_stdin() -> Vec { - let mut buffer: Vec = vec![]; - - match io::stdin().lock().read_to_end(&mut buffer) { - Ok(_) => buffer, - Err(_) => buffer, - } -} - -const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 50; // Minimum asset file size (in bytes) - -fn main() { - let mut options = Options::from_args(); - - // Check if target was provided - if options.target.len() == 0 { - if !options.silent { - eprintln!("No target specified"); + if let Some(encoding) = app.value_of("encoding") { + options.encoding = Some(encoding.to_string()); } - process::exit(1); - } - - // Check if custom encoding is valid - if let Some(custom_encoding) = options.encoding.clone() { - if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() { - eprintln!("Unknown encoding: {}", &custom_encoding); - process::exit(1); + if let Some(domains) = app.get_many::("domains") { + let list_of_domains: Vec = domains.map(|v| v.clone()).collect::>(); + options.domains = Some(list_of_domains); } - } - - let mut use_stdin: bool = false; - - let target_url = match options.target.as_str() { - "-" => { - // Read from pipe (stdin) - use_stdin = true; - // Set default target URL to an empty data URL; the user can set it via --base-url - Url::parse("data:text/html,").unwrap() + options.ignore_errors = app.is_present("ignore-errors"); + options.no_frames = app.is_present("no-frames"); + options.no_fonts = app.is_present("no-fonts"); + options.no_images = app.is_present("no-images"); + options.isolate = app.is_present("isolate"); + options.no_js = app.is_present("no-js"); + options.insecure = app.is_present("insecure"); + options.no_metadata = app.is_present("no-metadata"); + options.output = app.value_of("output").unwrap_or("").to_string(); + options.silent = app.is_present("silent"); + options.timeout = app + .value_of("timeout") + .unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string()) + .parse::() + .unwrap(); + if let Some(user_agent) = app.value_of("user-agent") { + options.user_agent = Some(user_agent.to_string()); + } else { + options.user_agent = Some(DEFAULT_USER_AGENT.to_string()); } - target => match Url::parse(&target) { - Ok(url) => match url.scheme() { - "data" | "file" | "http" | "https" => url, - unsupported_scheme => { - if !options.silent { - eprintln!("Unsupported target URL type: {}", unsupported_scheme); - } - process::exit(1) - } - }, - Err(_) => { - // Failed to parse given base URL (perhaps it's a filesystem path?) - let path: &Path = Path::new(&target); - match path.exists() { - true => match path.is_file() { - true => { - let canonical_path = fs::canonicalize(&path).unwrap(); - match Url::from_file_path(canonical_path) { - Ok(url) => url, - Err(_) => { - if !options.silent { - eprintln!( - "Could not generate file URL out of given path: {}", - &target - ); - } - process::exit(1); - } - } - } - false => { - if !options.silent { - eprintln!("Local target is not a file: {}", &target); - } - process::exit(1); - } - }, - false => { - // It is not a FS path, now we do what browsers do: - // prepend "http://" and hope it points to a website - Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target)) - .unwrap() - } - } + options.unwrap_noscript = app.is_present("unwrap-noscript"); + options.no_video = app.is_present("no-video"); + + options.no_color = + env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr); + if let Some(term) = env::var_os(ENV_VAR_TERM) { + if term == "dumb" { + options.no_color = true; } - }, + } + } + + // Set up cache (attempt to create temporary file) + let temp_cache_file = match Builder::new().prefix("monolith-").tempfile() { + Ok(tempfile) => Some(tempfile), + Err(_) => None, }; + let mut cache = Cache::new( + CACHE_ASSET_FILE_SIZE_THRESHOLD, + if temp_cache_file.is_some() { + Some( + temp_cache_file + .as_ref() + .unwrap() + .path() + .display() + .to_string(), + ) + } else { + None + }, + ); // Read and parse cookie file - if let Some(opt_cookie_file) = options.cookie_file.clone() { - match fs::read_to_string(opt_cookie_file) { + if let Some(opt_cookie_file) = cookie_file_path.clone() { + match read_to_string(opt_cookie_file) { Ok(str) => match parse_cookie_file_contents(&str) { - Ok(cookies) => { - options.cookies = cookies; + Ok(parsed_cookies_from_file) => { + options.cookies = parsed_cookies_from_file; } Err(_) => { eprintln!("Could not parse specified cookie file"); @@ -163,204 +171,5 @@ fn main() { } } - // Initialize client - let temp_file = Builder::new() - .prefix(".monolith-") - .keep(!true) - .tempfile() - .unwrap(); - let mut cache = Cache::new( - CACHE_ASSET_FILE_SIZE_THRESHOLD, - Some(temp_file.path().display().to_string()), - ); - let mut header_map = HeaderMap::new(); - if let Some(user_agent) = &options.user_agent { - header_map.insert( - USER_AGENT, - HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"), - ); - } - let client = if options.timeout > 0 { - Client::builder().timeout(Duration::from_secs(options.timeout)) - } else { - // No timeout is default - Client::builder() - } - .danger_accept_invalid_certs(options.insecure) - .default_headers(header_map) - .build() - .expect("Failed to initialize HTTP client"); - - // At first we assume that base URL is the same as target URL - let mut base_url: Url = target_url.clone(); - - let data: Vec; - let mut document_encoding: String = "".to_string(); - let mut dom: RcDom; - - // Retrieve target document - if use_stdin { - data = read_stdin(); - } else if target_url.scheme() == "file" - || (target_url.scheme() == "http" || target_url.scheme() == "https") - || target_url.scheme() == "data" - { - match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options) { - Ok((retrieved_data, final_url, media_type, charset)) => { - // Provide output as text without processing it, the way browsers do - if !media_type.eq_ignore_ascii_case("text/html") - && !media_type.eq_ignore_ascii_case("application/xhtml+xml") - { - // Define output - let mut output = - Output::new(&options.output).expect("Could not prepare output"); - - // Write retrieved data into STDOUT or file - output - .write(&retrieved_data) - .expect("Could not write output"); - - // Nothing else to do past this point - process::exit(0); - } - - if options - .base_url - .clone() - .unwrap_or("".to_string()) - .is_empty() - { - base_url = final_url; - } - - data = retrieved_data; - document_encoding = charset; - } - Err(_) => { - if !options.silent { - eprintln!("Could not retrieve target document"); - } - process::exit(1); - } - } - } else { - process::exit(1); - } - - // Initial parse - dom = html_to_dom(&data, document_encoding.clone()); - - // TODO: investigate if charset from filesystem/data URL/HTTP headers - // has say over what's specified in HTML - - // Attempt to determine document's charset - if let Some(html_charset) = get_charset(&dom.document) { - if !html_charset.is_empty() { - // Check if the charset specified inside HTML is valid - if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) { - document_encoding = html_charset; - dom = html_to_dom(&data, encoding.name().to_string()); - } - } - } - - // Use custom base URL if specified, read and use what's in the DOM otherwise - let custom_base_url: String = options.base_url.clone().unwrap_or("".to_string()); - if custom_base_url.is_empty() { - // No custom base URL is specified - // Try to see if document has BASE element - if let Some(existing_base_url) = get_base_url(&dom.document) { - base_url = resolve_url(&target_url, &existing_base_url); - } - } else { - // Custom base URL provided - match Url::parse(&custom_base_url) { - Ok(parsed_url) => { - if parsed_url.scheme() == "file" { - // File base URLs can only work with - // documents saved from filesystem - if target_url.scheme() == "file" { - base_url = parsed_url; - } - } else { - base_url = parsed_url; - } - } - Err(_) => { - // Failed to parse given base URL, perhaps it's a filesystem path? - if target_url.scheme() == "file" { - // Relative paths could work for documents saved from filesystem - let path: &Path = Path::new(&custom_base_url); - if path.exists() { - match Url::from_file_path(fs::canonicalize(&path).unwrap()) { - Ok(file_url) => { - base_url = file_url; - } - Err(_) => { - if !options.silent { - eprintln!( - "Could not map given path to base URL: {}", - custom_base_url - ); - } - process::exit(1); - } - } - } - } - } - } - } - - // Traverse through the document and embed remote assets - walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options); - - // Update or add new BASE element to reroute network requests and hash-links - if let Some(new_base_url) = options.base_url.clone() { - dom = set_base_url(&dom.document, new_base_url); - } - - // Request and embed /favicon.ico (unless it's already linked in the document) - if !options.no_images - && (target_url.scheme() == "http" || target_url.scheme() == "https") - && !has_favicon(&dom.document) - { - let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico"); - - match retrieve_asset(&mut cache, &client, &target_url, &favicon_ico_url, &options) { - Ok((data, final_url, media_type, charset)) => { - let favicon_data_url: Url = - create_data_url(&media_type, &charset, &data, &final_url); - dom = add_favicon(&dom.document, favicon_data_url.to_string()); - } - Err(_) => { - // Failed to retrieve /favicon.ico - } - } - } - - // Save using specified charset, if given - if let Some(custom_encoding) = options.encoding.clone() { - document_encoding = custom_encoding; - dom = set_charset(dom, document_encoding.clone()); - } - - // Serialize DOM tree - let mut result: Vec = serialize_document(dom, document_encoding, &options); - - // Prepend metadata comment tag - if !options.no_metadata { - let mut metadata_comment: String = create_metadata_tag(&target_url); - metadata_comment += "\n"; - result.splice(0..0, metadata_comment.as_bytes().to_vec()); - } - - // Define output - let mut output = Output::new(&options.output).expect("Could not prepare output"); - - // Write result into STDOUT or file - output.write(&result).expect("Could not write output"); - - // Remove temporary file used for storing cache's database - // drop(temp_file); + create_monolithic_file(&options, &mut cache); } diff --git a/src/opts.rs b/src/opts.rs deleted file mode 100644 index 7a90694b..00000000 --- a/src/opts.rs +++ /dev/null @@ -1,154 +0,0 @@ -use clap::{App, Arg, ArgAction}; -use std::env; - -use crate::cookies::Cookie; - -#[derive(Default)] -pub struct Options { - pub no_audio: bool, - pub base_url: Option, - pub blacklist_domains: bool, - pub no_css: bool, - pub cookie_file: Option, - pub cookies: Vec, - pub domains: Option>, - pub ignore_errors: bool, - pub encoding: Option, - pub no_frames: bool, - pub no_fonts: bool, - pub no_images: bool, - pub isolate: bool, - pub no_js: bool, - pub insecure: bool, - pub no_metadata: bool, - pub output: String, - pub silent: bool, - pub timeout: u64, - pub user_agent: Option, - pub no_video: bool, - pub target: String, - pub no_color: bool, - pub unwrap_noscript: bool, -} - -const ASCII: &'static str = " \ - _____ ______________ __________ ___________________ ___ -| \\ / \\ | | | | | | -| \\_/ __ \\_| __ | | ___ ___ |__| | -| | | | | | | | | | | | -| |\\ /| |__| _ |__| |____| | | | | __ | -| | \\___/ | | \\ | | | | | | | -|___| |__________| \\_____________________| |___| |___| |___| -"; -const DEFAULT_NETWORK_TIMEOUT: u64 = 120; -const DEFAULT_USER_AGENT: &'static str = - "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0"; -const ENV_VAR_NO_COLOR: &str = "NO_COLOR"; -const ENV_VAR_TERM: &str = "TERM"; - -impl Options { - pub fn from_args() -> Options { - let app = App::new(env!("CARGO_PKG_NAME")) - .version(env!("CARGO_PKG_VERSION")) - .author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str()) - .about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str()) - .args_from_usage("-a, --no-audio 'Remove audio sources'") - .args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'") - .args_from_usage( - "-B, --blacklist-domains 'Treat list of specified domains as blacklist'", - ) - .args_from_usage("-c, --no-css 'Remove CSS'") - .args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'") - .arg( - Arg::with_name("domains") - .short('d') - .long("domain") - .takes_value(true) - .value_name("example.com") - .action(ArgAction::Append) - .help("Specify domains to use for white/black-listing"), - ) - .args_from_usage("-e, --ignore-errors 'Ignore network errors'") - .args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'") - .args_from_usage("-f, --no-frames 'Remove frames and iframes'") - .args_from_usage("-F, --no-fonts 'Remove fonts'") - .args_from_usage("-i, --no-images 'Remove images'") - .args_from_usage("-I, --isolate 'Cut off document from the Internet'") - .args_from_usage("-j, --no-js 'Remove JavaScript'") - .args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'") - .args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'") - .args_from_usage( - "-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'", - ) - .args_from_usage( - "-o, --output=[document.html] 'Write output to , use - for STDOUT'", - ) - .args_from_usage("-s, --silent 'Suppress verbosity'") - .args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'") - .args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'") - .args_from_usage("-v, --no-video 'Remove video sources'") - .arg( - Arg::with_name("target") - .required(true) - .takes_value(true) - .index(1) - .help("URL or file path, use - for STDIN"), - ) - .get_matches(); - let mut options: Options = Options::default(); - - // Process the command - options.target = app - .value_of("target") - .expect("please set target") - .to_string(); - options.no_audio = app.is_present("no-audio"); - if let Some(base_url) = app.value_of("base-url") { - options.base_url = Some(base_url.to_string()); - } - options.blacklist_domains = app.is_present("blacklist-domains"); - options.no_css = app.is_present("no-css"); - if let Some(cookie_file) = app.value_of("cookies") { - options.cookie_file = Some(cookie_file.to_string()); - } - if let Some(encoding) = app.value_of("encoding") { - options.encoding = Some(encoding.to_string()); - } - if let Some(domains) = app.get_many::("domains") { - let list_of_domains: Vec = domains.map(|v| v.clone()).collect::>(); - options.domains = Some(list_of_domains); - } - options.ignore_errors = app.is_present("ignore-errors"); - options.no_frames = app.is_present("no-frames"); - options.no_fonts = app.is_present("no-fonts"); - options.no_images = app.is_present("no-images"); - options.isolate = app.is_present("isolate"); - options.no_js = app.is_present("no-js"); - options.insecure = app.is_present("insecure"); - options.no_metadata = app.is_present("no-metadata"); - options.output = app.value_of("output").unwrap_or("").to_string(); - options.silent = app.is_present("silent"); - options.timeout = app - .value_of("timeout") - .unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string()) - .parse::() - .unwrap(); - if let Some(user_agent) = app.value_of("user-agent") { - options.user_agent = Some(user_agent.to_string()); - } else { - options.user_agent = Some(DEFAULT_USER_AGENT.to_string()); - } - options.unwrap_noscript = app.is_present("unwrap-noscript"); - options.no_video = app.is_present("no-video"); - - options.no_color = - env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr); - if let Some(term) = env::var_os(ENV_VAR_TERM) { - if term == "dumb" { - options.no_color = true; - } - } - - options - } -} diff --git a/src/url.rs b/src/url.rs index a418c327..5007db3c 100644 --- a/src/url.rs +++ b/src/url.rs @@ -2,7 +2,7 @@ use base64::prelude::*; use percent_encoding::percent_decode_str; use url::Url; -use crate::utils::{detect_media_type, parse_content_type}; +use crate::core::{detect_media_type, parse_content_type}; pub const EMPTY_IMAGE_DATA_URL: &'static str = "data:image/png,\ %89PNG%0D%0A%1A%0A%00%00%00%0DIHDR%00%00%00%0D%00%00%00%0D%08%04%00%00%00%D8%E2%2C%F7%00%00%00%11IDATx%DAcd%C0%09%18G%A5%28%96%02%00%0A%F8%00%0E%CB%8A%EB%16%00%00%00%00IEND%AEB%60%82"; diff --git a/tests/utils/detect_media_type.rs b/tests/core/detect_media_type.rs similarity index 69% rename from tests/utils/detect_media_type.rs rename to tests/core/detect_media_type.rs index f1eeb093..bd5aa178 100644 --- a/tests/utils/detect_media_type.rs +++ b/tests/core/detect_media_type.rs @@ -9,34 +9,31 @@ mod passing { use reqwest::Url; - use monolith::utils; + use monolith::core::detect_media_type; #[test] fn image_gif87() { let dummy_url: Url = Url::parse("data:,").unwrap(); - assert_eq!(utils::detect_media_type(b"GIF87a", &dummy_url), "image/gif"); + assert_eq!(detect_media_type(b"GIF87a", &dummy_url), "image/gif"); } #[test] fn image_gif89() { let dummy_url: Url = Url::parse("data:,").unwrap(); - assert_eq!(utils::detect_media_type(b"GIF89a", &dummy_url), "image/gif"); + assert_eq!(detect_media_type(b"GIF89a", &dummy_url), "image/gif"); } #[test] fn image_jpeg() { let dummy_url: Url = Url::parse("data:,").unwrap(); - assert_eq!( - utils::detect_media_type(b"\xFF\xD8\xFF", &dummy_url), - "image/jpeg" - ); + assert_eq!(detect_media_type(b"\xFF\xD8\xFF", &dummy_url), "image/jpeg"); } #[test] fn image_png() { let dummy_url: Url = Url::parse("data:,").unwrap(); assert_eq!( - utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", &dummy_url), + detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", &dummy_url), "image/png" ); } @@ -44,17 +41,14 @@ mod passing { #[test] fn image_svg() { let dummy_url: Url = Url::parse("data:,").unwrap(); - assert_eq!( - utils::detect_media_type(b"