Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-netbsd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
with:
usesh: true
prepare: |
/usr/sbin/pkg_add rust mktools gmake pkgconf cwrappers
/usr/sbin/pkg_add cwrappers gmake mktools pkgconf rust
run: |
cargo build --all --locked --verbose --no-default-features --features cli
cargo test --all --locked --verbose --no-default-features --features cli
77 changes: 27 additions & 50 deletions src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ impl Error for MonolithError {
pub struct Options {
pub base_url: Option<String>,
pub blacklist_domains: bool,
// pub cache: Option<Cache>,
pub cookies: Vec<Cookie>,
pub cookies: Vec<Cookie>, // TODO: move out of this struct
pub domains: Option<Vec<String>>,
pub encoding: Option<String>,
pub ignore_errors: bool,
Expand All @@ -64,9 +63,7 @@ pub struct Options {
pub no_js: bool,
pub no_metadata: bool,
pub no_video: bool,
pub output: String,
pub silent: bool,
pub target: String,
pub timeout: u64,
pub unwrap_noscript: bool,
pub user_agent: Option<String>,
Expand Down Expand Up @@ -104,30 +101,28 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
];

pub fn create_monolithic_document(
source: String,
options: &Options,
mut cache: &mut Cache,
mut cache: &mut Cache, // TODO: make it Option-al
) -> Result<Vec<u8>, MonolithError> {
// Check if target was provided
if options.target.len() == 0 {
if !options.silent {
eprintln!("No target specified");
}

// Check if source was provided
if source.len() == 0 {
return Err(MonolithError::new("no target specified"));
}

// Check if custom encoding value is acceptable
if let Some(custom_encoding) = options.encoding.clone() {
if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() {
eprintln!("Unknown encoding: {}", &custom_encoding);

return Err(MonolithError::new("unknown encoding specified"));
return Err(MonolithError::new(&format!(
"unknown encoding \"{}\"",
&custom_encoding
)));
}
}

let mut use_stdin: bool = false;

let target_url = match options.target.as_str() {
let target_url = match source.as_str() {
"-" => {
// Read from pipe (stdin)
use_stdin = true;
Expand All @@ -138,11 +133,10 @@ pub fn create_monolithic_document(
Ok(url) => match url.scheme() {
"data" | "file" | "http" | "https" => url,
unsupported_scheme => {
if !options.silent {
eprintln!("Unsupported target URL type: {}", unsupported_scheme);
}

return Err(MonolithError::new("unsupported target URL type"));
return Err(MonolithError::new(&format!(
"unsupported target URL scheme \"{}\"",
unsupported_scheme
)));
}
},
Err(_) => {
Expand All @@ -155,32 +149,24 @@ pub fn create_monolithic_document(
match Url::from_file_path(canonical_path) {
Ok(url) => url,
Err(_) => {
if !options.silent {
eprintln!(
"Could not generate file URL out of given path: {}",
&target
);
}

return Err(MonolithError::new(
"could not generate file URL out of given path",
));
return Err(MonolithError::new(&format!(
"could not generate file URL out of given path \"{}\"",
&target
)));
}
}
}
false => {
if !options.silent {
eprintln!("Local target is not a file: {}", &target);
}

return Err(MonolithError::new("local target is not a file"));
return Err(MonolithError::new(&format!(
"local target \"{}\" is not a file",
&target
)));
}
},
false => {
// It is not a FS path, now we do what browsers do:
// prepend "http://" and hope it points to a website
Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target))
.unwrap()
Url::parse(&format!("http://{}", &target)).unwrap()
}
}
}
Expand Down Expand Up @@ -245,10 +231,6 @@ pub fn create_monolithic_document(
document_encoding = charset;
}
Err(_) => {
if !options.silent {
eprintln!("Could not retrieve target document");
}

return Err(MonolithError::new("could not retrieve target document"));
}
}
Expand Down Expand Up @@ -306,15 +288,10 @@ pub fn create_monolithic_document(
base_url = file_url;
}
Err(_) => {
if !options.silent {
eprintln!(
"Could not map given path to base URL: {}",
custom_base_url
);
}
return Err(MonolithError::new(
"could not map given path to base URL",
));
return Err(MonolithError::new(&format!(
"could not map given path to base URL \"{}\"",
custom_base_url
)));
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ pub fn create_metadata_tag(url: &Url) -> String {
format!(
"<!-- Saved from {} at {} using {} v{} -->",
if clean_url.scheme() == "http" || clean_url.scheme() == "https" {
&clean_url.as_str()
clean_url.as_str()
} else {
"local source"
},
Expand Down Expand Up @@ -357,7 +357,7 @@ pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option<Handle

pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option<String> {
match &node.data {
NodeData::Element { ref attrs, .. } => {
NodeData::Element { attrs, .. } => {
for attr in attrs.borrow().iter() {
if &*attr.name.local == attr_name {
return Some(attr.value.to_string());
Expand All @@ -371,7 +371,7 @@ pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option<String> {

pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
match &node.data {
NodeData::Element { ref name, .. } => Some(name.local.as_ref()),
NodeData::Element { name, .. } => Some(name.local.as_ref()),
_ => None,
}
}
Expand Down Expand Up @@ -534,7 +534,7 @@ pub fn set_charset(mut dom: RcDom, desired_charset: String) -> RcDom {

pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>) {
match &node.data {
NodeData::Element { ref attrs, .. } => {
NodeData::Element { attrs, .. } => {
let attrs_mut = &mut attrs.borrow_mut();
let mut i = 0;
let mut found_existing_attr: bool = false;
Expand Down
24 changes: 15 additions & 9 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ enum Output {
}

impl Output {
fn new(file_path: &str) -> Result<Output, IoError> {
if file_path.is_empty() || file_path.eq("-") {
fn new(destination: &str) -> Result<Output, IoError> {
if destination.is_empty() || destination.eq("-") {
Ok(Output::Stdout(io::stdout()))
} else {
Ok(Output::File(fs::File::create(file_path)?))
Ok(Output::File(fs::File::create(destination)?))
}
}

Expand Down Expand Up @@ -58,14 +58,16 @@ const ASCII: &'static str = " \
const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 50; // Minimum file size for on-disk caching (in bytes)
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0";
const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
const ENV_VAR_TERM: &str = "TERM";

fn main() {
// Process CLI flags and options
let mut cookie_file_path: Option<String> = None;
let mut options: Options = Options::default();
let source;
let destination;
{
let app = App::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
Expand Down Expand Up @@ -116,7 +118,7 @@ fn main() {
.get_matches();

// Process the command
options.target = app
source = app
.value_of("target")
.expect("please set target")
.to_string();
Expand Down Expand Up @@ -144,7 +146,7 @@ fn main() {
options.no_js = app.is_present("no-js");
options.insecure = app.is_present("insecure");
options.no_metadata = app.is_present("no-metadata");
options.output = app.value_of("output").unwrap_or("").to_string();
destination = app.value_of("output").unwrap_or("").to_string();
options.silent = app.is_present("silent");
options.timeout = app
.value_of("timeout")
Expand Down Expand Up @@ -208,15 +210,19 @@ fn main() {
}
}

match create_monolithic_document(&options, &mut cache) {
match create_monolithic_document(source, &options, &mut cache) {
Ok(result) => {
// Define output
let mut output = Output::new(&options.output).expect("Could not prepare output");
let mut output = Output::new(&destination).expect("Could not prepare output");

// Write result into STDOUT or file
output.write(&result).expect("Could not write output");
}
Err(_) => {
Err(error) => {
if !options.silent {
eprintln!("Error: {}", error);
}

process::exit(1);
}
}
Expand Down
20 changes: 19 additions & 1 deletion tests/cli/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,25 @@ mod failing {
// STDERR should contain error description
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"No target specified\n"
"Error: no target specified\n"
);

// STDOUT should be empty
assert_eq!(String::from_utf8_lossy(&out.stdout), "");

// Exit code should be 1
out.assert().code(1);
}

#[test]
fn unsupported_scheme() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("mailto:snshn@tutanota.com").output().unwrap();

// STDERR should contain error description
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"Error: unsupported target URL scheme \"mailto\"\n"
);

// STDOUT should be empty
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/unusual_encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ mod passing {
// STDERR should contain error message
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"Unknown encoding: utf0\n"
"Error: unknown encoding \"utf0\"\n"
);

// STDOUT should be empty
Expand Down
5 changes: 0 additions & 5 deletions tests/core/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ mod passing {
assert_eq!(options.no_audio, false);
assert_eq!(options.base_url, None);
assert_eq!(options.no_css, false);
// assert_eq!(options.cache, None);
// assert_eq!(options.cookies, None);
assert_eq!(options.encoding, None);
assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false);
Expand All @@ -26,12 +24,9 @@ mod passing {
assert_eq!(options.no_js, false);
assert_eq!(options.insecure, false);
assert_eq!(options.no_metadata, false);
assert_eq!(options.output, "".to_string());
assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, None);
assert_eq!(options.no_video, false);

assert_eq!(options.target, "".to_string());
}
}
5 changes: 4 additions & 1 deletion tests/html/compose_csp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ mod passing {
options.no_images = true;
let csp_content = html::compose_csp(&options);

assert_eq!(csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
assert_eq!(
csp_content,
"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;"
);
}
}
2 changes: 1 addition & 1 deletion tests/html/get_node_attr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ mod passing {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
NodeData::Element { name, .. } => {
let node_name = name.local.as_ref().to_string();

if node_name == "body" {
Expand Down
2 changes: 1 addition & 1 deletion tests/html/get_node_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ mod passing {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
NodeData::Element { name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent = html::get_parent_node(node);
let parent_node_name = html::get_node_name(&parent);
Expand Down
18 changes: 3 additions & 15 deletions tests/html/serialize_document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ mod passing {
options.isolate = true;

assert_eq!(
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
)),
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
Expand Down Expand Up @@ -89,11 +85,7 @@ mod passing {
options.no_frames = true;

assert_eq!(
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
)),
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<!DOCTYPE html>\
<html>\
<head>\
Expand Down Expand Up @@ -127,11 +119,7 @@ mod passing {
options.no_images = true;

assert_eq!(
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
)),
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<!DOCTYPE html>\
<html>\
<head>\
Expand Down
Loading