Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions examples/builder/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@ async fn main() -> Result<()> {
let mut headers = HeaderMap::new();
headers.insert(header::ACCEPT, "text/html".parse().unwrap());

let accepted = Some(HashSet::from_iter(vec![
StatusCode::OK,
StatusCode::NO_CONTENT,
]));
let accepted = HashSet::from_iter(vec![StatusCode::OK, StatusCode::NO_CONTENT]);

let client = ClientBuilder::builder()
.excludes(excludes)
Expand Down
9 changes: 1 addition & 8 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
let remaps = parse_remaps(&cfg.remap)?;
let includes = RegexSet::new(&cfg.include)?;
let excludes = RegexSet::new(&cfg.exclude)?;
let accepted: HashSet<StatusCode> = cfg.accept.clone().try_into()?;

// Offline mode overrides the scheme
let schemes = if cfg.offline {
Expand All @@ -25,14 +26,6 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
cfg.scheme.clone()
};

let accepted = cfg
.accept
.clone()
.into_set()
.iter()
.map(|value| StatusCode::from_u16(*value))
.collect::<Result<HashSet<_>, _>>()?;

let headers = HeaderMap::from_header_pairs(&cfg.header)?;

ClientBuilder::builder()
Expand Down
2 changes: 1 addition & 1 deletion lychee-bin/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ where
let client = params.client;
let cache = params.cache;
let cache_exclude_status = params.cfg.cache_exclude_status.into_set();
let accept = params.cfg.accept.into_set();
let accept = params.cfg.accept.into();

let pb = if params.cfg.no_progress || params.cfg.verbose.log_level() >= log::Level::Info {
None
Expand Down
8 changes: 4 additions & 4 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ mod cli {
.failure()
.code(2)
.stdout(contains(
"[404] https://github.com/mre/idiomatic-rust-doesnt-exist-man | Network error: Not Found"
r#"[404] https://github.com/mre/idiomatic-rust-doesnt-exist-man | Rejected status code (this depends on your "accept" configuration): Not Found"#
))
.stderr(contains(
"There were issues with GitHub URLs. You could try setting a GitHub token and running lychee again.",
Expand Down Expand Up @@ -1118,7 +1118,7 @@ mod cli {
mock_server_no_content.uri()
)))
.stderr(contains(format!(
"[429] {}/ | Network error: Too Many Requests\n",
"[429] {}/ | Rejected status code (this depends on your \"accept\" configuration): Too Many Requests\n",
mock_server_too_many_requests.uri()
)));

Expand Down Expand Up @@ -1176,11 +1176,11 @@ mod cli {
.failure()
.code(2)
.stdout(contains(format!(
"[418] {}/ | Network error: I\'m a teapot",
r#"[418] {}/ | Rejected status code (this depends on your "accept" configuration): I'm a teapot"#,
mock_server_teapot.uri()
)))
.stdout(contains(format!(
"[500] {}/ | Network error: Internal Server Error",
r#"[500] {}/ | Rejected status code (this depends on your "accept" configuration): Internal Server Error"#,
mock_server_server_error.uri()
)));

Expand Down
13 changes: 7 additions & 6 deletions lychee-lib/src/checker/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ pub(crate) struct WebsiteChecker {
/// Set of accepted return codes / status codes.
///
/// Unmatched return codes/ status codes are deemed as errors.
accepted: Option<HashSet<StatusCode>>,
accepted: HashSet<StatusCode>,

/// Requires using HTTPS when it's available.
///
Expand All @@ -59,7 +59,7 @@ impl WebsiteChecker {
retry_wait_time: Duration,
max_retries: u64,
reqwest_client: reqwest::Client,
accepted: Option<HashSet<StatusCode>>,
accepted: HashSet<StatusCode>,
github_client: Option<Octocrab>,
require_https: bool,
plugin_request_chain: RequestChain,
Expand Down Expand Up @@ -102,11 +102,12 @@ impl WebsiteChecker {
let method = request.method().clone();
match self.reqwest_client.execute(request).await {
Ok(response) => {
let mut status = Status::new(&response, self.accepted.clone());
let status = Status::new(&response, &self.accepted);
if self.include_fragments && status.is_success() && method == Method::GET {
status = self.check_html_fragment(status, response).await;
self.check_html_fragment(status, response).await
} else {
status
}
status
}
Err(e) => e.into(),
}
Expand All @@ -128,7 +129,7 @@ impl WebsiteChecker {
.await
{
Ok(true) => status,
Ok(false) => Status::Error(ErrorKind::InvalidFragment(url.clone().into())),
Ok(false) => Status::Error(ErrorKind::InvalidFragment(url.into())),
Err(e) => Status::Error(e),
}
}
Expand Down
6 changes: 4 additions & 2 deletions lychee-lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use crate::{
checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker},
filter::{Excludes, Filter, Includes},
remap::Remaps,
types::DEFAULT_ACCEPTED_STATUS_CODES,
};

/// Default number of redirects before a request is deemed as failed, 5.
Expand Down Expand Up @@ -238,7 +239,8 @@ pub struct ClientBuilder {
/// Set of accepted return codes / status codes.
///
/// Unmatched return codes/ status codes are deemed as errors.
accepted: Option<HashSet<StatusCode>>,
#[builder(default = DEFAULT_ACCEPTED_STATUS_CODES.clone())]
accepted: HashSet<StatusCode>,

/// Response timeout per request in seconds.
timeout: Option<Duration>,
Expand Down Expand Up @@ -361,7 +363,7 @@ impl ClientBuilder {
None => builder,
}
.build()
.map_err(ErrorKind::NetworkRequest)?;
.map_err(ErrorKind::BuildRequestClient)?;
Comment thread
thomas-zahner marked this conversation as resolved.

let github_client = match self.github_token.as_ref().map(ExposeSecret::expose_secret) {
Some(token) if !token.is_empty() => Some(
Expand Down
2 changes: 2 additions & 0 deletions lychee-lib/src/retry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ impl RetryExt for ErrorKind {
}) = self.github_error()
{
source.should_retry()
} else if let Self::RejectedStatusCode(StatusCode::TOO_MANY_REQUESTS) = self {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And not quite sure if this the optimal approach

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, not sure about this. Doesn't that mean: "if a user decided to reject a 429 Too Many Requests status code, you should still retry"? To me, that sounds like the user should be in control here; if they decided to consider 429s as hard errors, so be it. 😉 That should be the final answer, and we shouldn't retry. So I'd remove that condition and see where it breaks. (Unless I'm misunderstanding.)

Copy link
Copy Markdown
Member Author

@thomas-zahner thomas-zahner May 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So actually I think we should leave it like this. Otherwise it means that lychee will never retry requests when a 429 occurs. The reason I noticed it in the first place was because without these lines there are test failures. If we don't keep these two lines that would mean that --max-retries would now only apply to the above and below cases. (octocrab and reqwest errors) The reason for this is again the removal of response.error_for_status_ref as explained in my other comment. So previously we entered the if let Some(r) = self.reqwest_error() case.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. Can we add a comment to explain that behavior. I'm sure I'll forget why it's there otherwise. 😅

return true;
} else {
false
}
Expand Down
13 changes: 7 additions & 6 deletions lychee-lib/src/types/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,13 @@ impl From<&Status> for CacheStatus {
Status::Redirected(code) => Self::Error(Some(code.as_u16())),
Status::Timeout(code) => Self::Error(code.map(|code| code.as_u16())),
Status::Error(e) => match e {
ErrorKind::NetworkRequest(e)
| ErrorKind::ReadResponseBody(e)
| ErrorKind::BuildRequestClient(e) => match e.status() {
Some(code) => Self::Error(Some(code.as_u16())),
None => Self::Error(None),
},
ErrorKind::RejectedStatusCode(code) => Self::Error(Some(code.as_u16())),
ErrorKind::ReadResponseBody(e) | ErrorKind::BuildRequestClient(e) => {
match e.status() {
Some(code) => Self::Error(Some(code.as_u16())),
None => Self::Error(None),
}
}
_ => Self::Error(None),
},
}
Expand Down
38 changes: 21 additions & 17 deletions lychee-lib/src/types/error.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use http::StatusCode;
use serde::{Serialize, Serializer};
use std::error::Error;
use std::hash::Hash;
Expand All @@ -14,7 +15,8 @@ use crate::{Uri, basic_auth::BasicAuthExtractorError, utils};
#[derive(Error, Debug)]
#[non_exhaustive]
pub enum ErrorKind {
/// Network error while handling request
/// Network error while handling request.
/// This does not include erroneous status codes, `RejectedStatusCode` will be used in that case.
#[error("Network error")]
NetworkRequest(#[source] reqwest::Error),
/// Cannot read the body of the received response
Expand Down Expand Up @@ -144,6 +146,10 @@ pub enum ErrorKind {
#[error("Invalid status code: {0}")]
InvalidStatusCode(u16),

/// The given status code was not accepted (this depends on the `accept` configuration)
#[error(r#"Rejected status code (this depends on your "accept" configuration)"#)]
RejectedStatusCode(StatusCode),

/// Regex error
#[error("Error when using regex engine: {0}")]
Regex(#[from] regex::Error),
Expand Down Expand Up @@ -175,25 +181,22 @@ impl ErrorKind {
pub fn details(&self) -> Option<String> {
match self {
ErrorKind::NetworkRequest(e) => {
if let Some(status) = e.status() {
Some(
status
.canonical_reason()
.unwrap_or("Unknown status code")
.to_string(),
)
// Get the relevant details from the specific reqwest error
let details = utils::reqwest::trim_error_output(e);

// Provide support for common error types
if e.is_connect() {
Some(format!("{details} Maybe a certificate error?"))
} else {
// Get the relevant details from the specific reqwest error
let details = utils::reqwest::trim_error_output(e);

// Provide support for common error types
if e.is_connect() {
Some(format!("{details} Maybe a certificate error?"))
} else {
Some(details)
}
Some(details)
}
}
ErrorKind::RejectedStatusCode(status) => Some(
status
.canonical_reason()
.unwrap_or("Unknown status code")
.to_string(),
),
ErrorKind::GithubRequest(e) => {
if let octocrab::Error::GitHub { source, .. } = &**e {
Some(source.message.clone())
Expand Down Expand Up @@ -322,6 +325,7 @@ impl Hash for ErrorKind {
Self::InvalidHeader(e) => e.to_string().hash(state),
Self::InvalidGlobPattern(e) => e.to_string().hash(state),
Self::InvalidStatusCode(c) => c.hash(state),
Self::RejectedStatusCode(c) => c.hash(state),
Self::Channel(e) => e.to_string().hash(state),
Self::MissingGitHubToken | Self::InvalidUrlHost => {
std::mem::discriminant(self).hash(state);
Expand Down
38 changes: 17 additions & 21 deletions lychee-lib/src/types/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,13 @@ impl Serialize for Status {
impl Status {
#[must_use]
/// Create a status object from a response and the set of accepted status codes
pub fn new(response: &Response, accepted: Option<HashSet<StatusCode>>) -> Self {
pub fn new(response: &Response, accepted: &HashSet<StatusCode>) -> Self {
let code = response.status();

if let Some(true) = accepted.map(|a| a.contains(&code)) {
if accepted.contains(&code) {
Self::Ok(code)
} else {
match response.error_for_status_ref() {
Ok(_) if code.is_success() => Self::Ok(code),
Ok(_) if code.is_redirection() => Self::Redirected(code),
Ok(_) => Self::UnknownStatusCode(code),
Err(e) => e.into(),
}
Self::Error(ErrorKind::RejectedStatusCode(code))
}
}

Expand Down Expand Up @@ -217,13 +212,13 @@ impl Status {
| Status::Redirected(code)
| Status::UnknownStatusCode(code)
| Status::Timeout(Some(code)) => Some(*code),
Status::Error(kind) | Status::Unsupported(kind) => {
if let Some(error) = kind.reqwest_error() {
error.status()
} else {
None
}
}
Status::Error(kind) | Status::Unsupported(kind) => match kind {
ErrorKind::RejectedStatusCode(status_code) => Some(*status_code),
_ => match kind.reqwest_error() {
Some(error) => error.status(),
None => None,
},
},
Status::Cached(CacheStatus::Ok(code) | CacheStatus::Error(Some(code))) => {
StatusCode::from_u16(*code).ok()
}
Expand All @@ -240,12 +235,13 @@ impl Status {
}
Status::Excluded => "EXCLUDED".to_string(),
Status::Error(e) => match e {
ErrorKind::NetworkRequest(e)
| ErrorKind::ReadResponseBody(e)
| ErrorKind::BuildRequestClient(e) => match e.status() {
Some(code) => code.as_str().to_string(),
None => "ERROR".to_string(),
},
ErrorKind::RejectedStatusCode(code) => code.as_str().to_string(),
ErrorKind::ReadResponseBody(e) | ErrorKind::BuildRequestClient(e) => {
Comment thread
mre marked this conversation as resolved.
match e.status() {
Some(code) => code.as_str().to_string(),
None => "ERROR".to_string(),
}
}
_ => "ERROR".to_string(),
},
Status::Timeout(code) => match code {
Expand Down
Loading