From b51a8ed2439e21f2980160bdf6af043e42d55434 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 8 Aug 2025 16:31:53 -0500 Subject: [PATCH 01/23] test(rustfmt): Verify frontmatter is preserved This is to prove that the frontmatter is preserved. The choices in tests is intended for showing the different parts of the proposed Style Guide for frontmatters. --- .../rustfmt/tests/source/frontmatter_compact.rs | 8 ++++++++ .../rustfmt/tests/source/frontmatter_escaped.rs | 13 +++++++++++++ .../rustfmt/tests/source/frontmatter_spaced.rs | 16 ++++++++++++++++ .../rustfmt/tests/target/frontmatter_compact.rs | 8 ++++++++ .../rustfmt/tests/target/frontmatter_escaped.rs | 13 +++++++++++++ .../rustfmt/tests/target/frontmatter_spaced.rs | 16 ++++++++++++++++ 6 files changed, 74 insertions(+) create mode 100644 src/tools/rustfmt/tests/source/frontmatter_compact.rs create mode 100644 src/tools/rustfmt/tests/source/frontmatter_escaped.rs create mode 100644 src/tools/rustfmt/tests/source/frontmatter_spaced.rs create mode 100644 src/tools/rustfmt/tests/target/frontmatter_compact.rs create mode 100644 src/tools/rustfmt/tests/target/frontmatter_escaped.rs create mode 100644 src/tools/rustfmt/tests/target/frontmatter_spaced.rs diff --git a/src/tools/rustfmt/tests/source/frontmatter_compact.rs b/src/tools/rustfmt/tests/source/frontmatter_compact.rs new file mode 100644 index 0000000000000..21d4c6f4b6160 --- /dev/null +++ b/src/tools/rustfmt/tests/source/frontmatter_compact.rs @@ -0,0 +1,8 @@ +#!/usr/bin/env cargo +---identifier +[dependencies] +regex = "1" +--- +#![feature(frontmatter)] + +fn main() {} diff --git a/src/tools/rustfmt/tests/source/frontmatter_escaped.rs b/src/tools/rustfmt/tests/source/frontmatter_escaped.rs new file mode 100644 index 0000000000000..0d02637756682 --- /dev/null +++ b/src/tools/rustfmt/tests/source/frontmatter_escaped.rs @@ -0,0 +1,13 @@ +#!/usr/bin/env cargo +------------ +package.description = """ +Header +----- + +Body +""" +------------ + +#![feature(frontmatter)] + +fn main() {} diff --git a/src/tools/rustfmt/tests/source/frontmatter_spaced.rs b/src/tools/rustfmt/tests/source/frontmatter_spaced.rs new file mode 100644 index 0000000000000..ee0bb81705c49 --- /dev/null +++ b/src/tools/rustfmt/tests/source/frontmatter_spaced.rs @@ -0,0 +1,16 @@ +#!/usr/bin/env cargo + + +--- identifier +[dependencies] +regex = "1" + +--- + + + + + +#![feature(frontmatter)] + +fn main() {} diff --git a/src/tools/rustfmt/tests/target/frontmatter_compact.rs b/src/tools/rustfmt/tests/target/frontmatter_compact.rs new file mode 100644 index 0000000000000..21d4c6f4b6160 --- /dev/null +++ b/src/tools/rustfmt/tests/target/frontmatter_compact.rs @@ -0,0 +1,8 @@ +#!/usr/bin/env cargo +---identifier +[dependencies] +regex = "1" +--- +#![feature(frontmatter)] + +fn main() {} diff --git a/src/tools/rustfmt/tests/target/frontmatter_escaped.rs b/src/tools/rustfmt/tests/target/frontmatter_escaped.rs new file mode 100644 index 0000000000000..0d02637756682 --- /dev/null +++ b/src/tools/rustfmt/tests/target/frontmatter_escaped.rs @@ -0,0 +1,13 @@ +#!/usr/bin/env cargo +------------ +package.description = """ +Header +----- + +Body +""" +------------ + +#![feature(frontmatter)] + +fn main() {} diff --git a/src/tools/rustfmt/tests/target/frontmatter_spaced.rs b/src/tools/rustfmt/tests/target/frontmatter_spaced.rs new file mode 100644 index 0000000000000..ee0bb81705c49 --- /dev/null +++ b/src/tools/rustfmt/tests/target/frontmatter_spaced.rs @@ -0,0 +1,16 @@ +#!/usr/bin/env cargo + + +--- identifier +[dependencies] +regex = "1" + +--- + + + + + +#![feature(frontmatter)] + +fn main() {} From e50fed79a857ccf3ea516fb4d7fdab665b7a573c Mon Sep 17 00:00:00 2001 From: binarycat Date: Sun, 17 Aug 2025 12:35:54 -0500 Subject: [PATCH 02/23] add regression test for #145529 --- tests/rustdoc-ui/lints/invalid-html-tags.rs | 31 +++++++++++++++++++ .../rustdoc-ui/lints/invalid-html-tags.stderr | 14 ++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/tests/rustdoc-ui/lints/invalid-html-tags.rs b/tests/rustdoc-ui/lints/invalid-html-tags.rs index 317f1fd1d4641..b2f516725fa9e 100644 --- a/tests/rustdoc-ui/lints/invalid-html-tags.rs +++ b/tests/rustdoc-ui/lints/invalid-html-tags.rs @@ -121,3 +121,34 @@ pub fn no_error_1() {} /// backslashed \< //~^ ERROR unclosed HTML tag `a` pub fn p() {} + +/// +/// +/// +pub fn no_error_2() {} + +///
+/// +///
+pub fn no_error_3() {} + +/// unfinished ALLOWED_UNCLOSED +/// +///
+/// +//~^ ERROR unclosed HTML tag `img` +pub fn r() {} diff --git a/tests/rustdoc-ui/lints/invalid-html-tags.stderr b/tests/rustdoc-ui/lints/invalid-html-tags.stderr index 9c2bfcf2c3dd7..fc9849ff23cff 100644 --- a/tests/rustdoc-ui/lints/invalid-html-tags.stderr +++ b/tests/rustdoc-ui/lints/invalid-html-tags.stderr @@ -100,5 +100,17 @@ error: unclosed HTML tag `a` LL | /// backslashed \<
| ^^ -error: aborting due to 16 previous errors +error: unclosed HTML tag `img` + --> $DIR/invalid-html-tags.rs:147:5 + | +LL | /// $DIR/invalid-html-tags.rs:152:8 + | +LL | ///

+ | ^^^^ + +error: aborting due to 18 previous errors From 15a8999aedc0e3a0eb7b4e956dd79b3a2f28f2a3 Mon Sep 17 00:00:00 2001 From: binarycat Date: Sun, 17 Aug 2025 11:23:19 -0500 Subject: [PATCH 03/23] refactor rustdoc::invalid_html_tags tag parser previously, this lint did not distinguish between ``, and since the latter should be accepted under html5, the former was also accepted. the parser now also handles multi-line tags and multi-line attributes. --- src/librustdoc/lib.rs | 1 + src/librustdoc/passes/lint/html_tags.rs | 476 +++++++++++------- src/librustdoc/passes/lint/html_tags/tests.rs | 73 +++ tests/rustdoc-ui/lints/invalid-html-tags.rs | 67 ++- .../rustdoc-ui/lints/invalid-html-tags.stderr | 69 ++- 5 files changed, 498 insertions(+), 188 deletions(-) create mode 100644 src/librustdoc/passes/lint/html_tags/tests.rs diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index 28dbd8ba7d3ac..62e1ad2444d05 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -11,6 +11,7 @@ #![feature(file_buffered)] #![feature(format_args_nl)] #![feature(if_let_guard)] +#![feature(iter_advance_by)] #![feature(iter_intersperse)] #![feature(round_char_boundary)] #![feature(rustc_private)] diff --git a/src/librustdoc/passes/lint/html_tags.rs b/src/librustdoc/passes/lint/html_tags.rs index 19cf15d40a3b4..b9c9279daecaf 100644 --- a/src/librustdoc/passes/lint/html_tags.rs +++ b/src/librustdoc/passes/lint/html_tags.rs @@ -1,9 +1,11 @@ //! Detects invalid HTML (like an unclosed ``) in doc comments. +use std::borrow::Cow; use std::iter::Peekable; use std::ops::Range; use std::str::CharIndices; +use itertools::Itertools as _; use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd}; use rustc_hir::HirId; use rustc_resolve::rustdoc::source_span_for_markdown_range; @@ -101,7 +103,7 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: & }); }; - let mut tags = Vec::new(); + let mut tagp = TagParser::new(); let mut is_in_comment = None; let mut in_code_block = false; @@ -126,70 +128,65 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: & }; let p = Parser::new_with_broken_link_callback(dox, main_body_opts(), Some(&mut replacer)) - .into_offset_iter(); + .into_offset_iter() + .coalesce(|a, b| { + // for some reason, pulldown-cmark splits html blocks into separate events for each line. + // we undo this, in order to handle multi-line tags. + match (a, b) { + ((Event::Html(_), ra), (Event::Html(_), rb)) if ra.end == rb.start => { + let merged = ra.start..rb.end; + Ok((Event::Html(Cow::Borrowed(&dox[merged.clone()]).into()), merged)) + } + x => Err(x), + } + }); for (event, range) in p { match event { Event::Start(Tag::CodeBlock(_)) => in_code_block = true, Event::Html(text) | Event::InlineHtml(text) if !in_code_block => { - extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag) + tagp.extract_tags(&text, range, &mut is_in_comment, &report_diag) } Event::End(TagEnd::CodeBlock) => in_code_block = false, _ => {} } } - for (tag, range) in tags.iter().filter(|(t, _)| { - let t = t.to_lowercase(); - !ALLOWED_UNCLOSED.contains(&t.as_str()) - }) { - report_diag(format!("unclosed HTML tag `{tag}`"), range, true); - } - if let Some(range) = is_in_comment { report_diag("Unclosed HTML comment".to_string(), &range, false); + } else if let &Some(quote_pos) = &tagp.quote_pos { + let qr = Range { start: quote_pos, end: quote_pos }; + report_diag( + format!("unclosed quoted HTML attribute on tag `{}`", &tagp.tag_name), + &qr, + false, + ); + } else { + if !tagp.tag_name.is_empty() { + report_diag( + format!("incomplete HTML tag `{}`", &tagp.tag_name), + &(tagp.tag_start_pos..dox.len()), + false, + ); + } + for (tag, range) in tagp.tags.iter().filter(|(t, _)| { + let t = t.to_lowercase(); + !is_implicitly_self_closing(&t) + }) { + report_diag(format!("unclosed HTML tag `{tag}`"), range, true); + } } } +/// These tags are interpreted as self-closing if they lack an explicit closing tag. const ALLOWED_UNCLOSED: &[&str] = &[ "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr", ]; -fn drop_tag( - tags: &mut Vec<(String, Range)>, - tag_name: String, - range: Range, - f: &impl Fn(String, &Range, bool), -) { - let tag_name_low = tag_name.to_lowercase(); - if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) { - // If the tag is nested inside a "` (the `h2` tag isn't required - // but it helps for the visualization). - f(format!("unopened HTML tag `{tag_name}`"), &range, false); - } +/// Allows constructs like ``, but not ` bool { + ALLOWED_UNCLOSED.contains(&tag_name) } fn extract_path_backwards(text: &str, end_pos: usize) -> Option { @@ -252,151 +249,292 @@ fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool { c.is_ascii_alphabetic() || !is_empty && (c == '-' || c.is_ascii_digit()) } -fn extract_html_tag( - tags: &mut Vec<(String, Range)>, - text: &str, - range: &Range, - start_pos: usize, - iter: &mut Peekable>, - f: &impl Fn(String, &Range, bool), -) { - let mut tag_name = String::new(); - let mut is_closing = false; - let mut prev_pos = start_pos; +/// Parse html tags to ensure they are well-formed +#[derive(Debug, Clone)] +struct TagParser { + tags: Vec<(String, Range)>, + /// Name of the tag that is being parsed, if we are within a tag. + /// + /// Since the `<` and name of a tag must appear on the same line with no whitespace, + /// if this is the empty string, we are not in a tag. + tag_name: String, + tag_start_pos: usize, + is_closing: bool, + /// `true` if we are within a tag, but not within its name. + in_attrs: bool, + /// If we are in a quoted attribute, what quote char does it use? + /// + /// This needs to be stored in the struct since HTML5 allows newlines in quoted attrs. + quote: Option, + quote_pos: Option, + after_eq: bool, +} - loop { - let (pos, c) = match iter.peek() { - Some((pos, c)) => (*pos, *c), - // In case we reached the of the doc comment, we want to check that it's an - // unclosed HTML tag. For example "/// (prev_pos, '\0'), - }; - prev_pos = pos; - // Checking if this is a closing tag (like `` for ``). - if c == '/' && tag_name.is_empty() { - is_closing = true; - } else if is_valid_for_html_tag_name(c, tag_name.is_empty()) { - tag_name.push(c); - } else { - if !tag_name.is_empty() { - let mut r = Range { start: range.start + start_pos, end: range.start + pos }; - if c == '>' { - // In case we have a tag without attribute, we can consider the span to - // refer to it fully. - r.end += 1; +impl TagParser { + fn new() -> Self { + Self { + tags: Vec::new(), + tag_name: String::with_capacity(8), + tag_start_pos: 0, + is_closing: false, + in_attrs: false, + quote: None, + quote_pos: None, + after_eq: false, + } + } + + fn drop_tag(&mut self, range: Range, f: &impl Fn(String, &Range, bool)) { + let tag_name_low = self.tag_name.to_lowercase(); + if let Some(pos) = self.tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) { + // If the tag is nested inside a "` (the `h2` tag isn't required + // but it helps for the visualization). + f(format!("unopened HTML tag `{}`", &self.tag_name), &range, false); + } + } + + /// Handle a `<` that appeared while parsing a tag. + fn handle_lt_in_tag( + &mut self, + range: Range, + lt_pos: usize, + f: &impl Fn(String, &Range, bool), + ) { + let global_pos = range.start + lt_pos; + // is this check needed? + if global_pos == self.tag_start_pos { + // `<` is in the tag because it is the start. + return; + } + // tried to start a new tag while in a tag + f( + format!("incomplete HTML tag `{}`", &self.tag_name), + &(self.tag_start_pos..global_pos), + false, + ); + self.tag_parsed(); + } + + fn extract_html_tag( + &mut self, + text: &str, + range: &Range, + start_pos: usize, + iter: &mut Peekable>, + f: &impl Fn(String, &Range, bool), + ) { + let mut prev_pos = start_pos; + + 'outer_loop: loop { + let (pos, c) = match iter.peek() { + Some((pos, c)) => (*pos, *c), + // In case we reached the of the doc comment, we want to check that it's an + // unclosed HTML tag. For example "/// (prev_pos, '\0'), + None => break, + }; + prev_pos = pos; + if c == '/' && self.tag_name.is_empty() { + // Checking if this is a closing tag (like `` for ``). + self.is_closing = true; + } else if !self.in_attrs && is_valid_for_html_tag_name(c, self.tag_name.is_empty()) { + self.tag_name.push(c); + } else { + if !self.tag_name.is_empty() { + self.in_attrs = true; + let mut r = Range { start: range.start + start_pos, end: range.start + pos }; + if c == '>' { + // In case we have a tag without attribute, we can consider the span to + // refer to it fully. + r.end += 1; + } + if self.is_closing { + // In case we have "" or even "". + if c != '>' { if !c.is_whitespace() { - if c == '>' { - r.end = range.start + new_pos + 1; - found = true; - } + // It seems like it's not a valid HTML tag. break; } - } - if !found { - break; - } - } - drop_tag(tags, tag_name, r, f); - } else { - let mut is_self_closing = false; - let mut quote_pos = None; - if c != '>' { - let mut quote = None; - let mut after_eq = false; - for (i, c) in text[pos..].char_indices() { - if !c.is_whitespace() { - if let Some(q) = quote { - if c == q { - quote = None; - quote_pos = None; - after_eq = false; + let mut found = false; + for (new_pos, c) in text[pos..].char_indices() { + if !c.is_whitespace() { + if c == '>' { + r.end = range.start + new_pos + 1; + found = true; + } else if c == '<' { + self.handle_lt_in_tag(range.clone(), pos + new_pos, f); } - } else if c == '>' { break; - } else if c == '/' && !after_eq { - is_self_closing = true; - } else { - if is_self_closing { - is_self_closing = false; - } - if (c == '"' || c == '\'') && after_eq { - quote = Some(c); - quote_pos = Some(pos + i); - } else if c == '=' { - after_eq = true; - } } - } else if quote.is_none() { - after_eq = false; + } + if !found { + break 'outer_loop; } } - } - if let Some(quote_pos) = quote_pos { - let qr = Range { start: quote_pos, end: quote_pos }; - f( - format!("unclosed quoted HTML attribute on tag `{tag_name}`"), - &qr, - false, - ); - } - if is_self_closing { - // https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus - let valid = ALLOWED_UNCLOSED.contains(&&tag_name[..]) - || tags.iter().take(pos + 1).any(|(at, _)| { - let at = at.to_lowercase(); - at == "svg" || at == "math" - }); - if !valid { - f(format!("invalid self-closing HTML tag `{tag_name}`"), &r, false); - } + self.drop_tag(r, f); + self.tag_parsed(); } else { - tags.push((tag_name, r)); + self.extract_opening_tag(text, range, r, pos, c, iter, f) } } + break; } - break; + iter.next(); } - iter.next(); } -} - -fn extract_tags( - tags: &mut Vec<(String, Range)>, - text: &str, - range: Range, - is_in_comment: &mut Option>, - f: &impl Fn(String, &Range, bool), -) { - let mut iter = text.char_indices().peekable(); - while let Some((start_pos, c)) = iter.next() { - if is_in_comment.is_some() { - if text[start_pos..].starts_with("-->") { - *is_in_comment = None; + fn extract_opening_tag( + &mut self, + text: &str, + range: &Range, + r: Range, + pos: usize, + c: char, + iter: &mut Peekable>, + f: &impl Fn(String, &Range, bool), + ) { + // we can store this as a local, since html5 does require the `/` and `>` + // to not be separated by whitespace. + let mut is_self_closing = false; + if c != '>' { + 'parse_til_gt: { + for (i, c) in text[pos..].char_indices() { + if !c.is_whitespace() { + debug_assert_eq!(self.quote_pos.is_some(), self.quote.is_some()); + if let Some(q) = self.quote { + if c == q { + self.quote = None; + self.quote_pos = None; + self.after_eq = false; + } + } else if c == '>' { + break 'parse_til_gt; + } else if c == '<' { + self.handle_lt_in_tag(range.clone(), pos + i, f); + } else if c == '/' && !self.after_eq { + is_self_closing = true; + } else { + if is_self_closing { + is_self_closing = false; + } + if (c == '"' || c == '\'') && self.after_eq { + self.quote = Some(c); + self.quote_pos = Some(pos + i); + } else if c == '=' { + self.after_eq = true; + } + } + } else if self.quote.is_none() { + self.after_eq = false; + } + if !is_self_closing && !self.tag_name.is_empty() { + iter.next(); + } + } + // if we've run out of text but still haven't found a `>`, + // return early without calling `tag_parsed` or emitting lints. + // this allows us to either find the `>` in a later event + // or emit a lint about it being missing. + return; } - } else if c == '<' { - if text[start_pos..].starts_with("") { + *is_in_comment = None; + } + } else if c == '<' { + // " @@ -105,7 +106,7 @@ pub fn j() {} /// uiapp.run(&env::args().collect::>()); /// ``` /// -/// shouldn't warn! +// shouldn't warn! /// `````` pub fn k() {} @@ -141,14 +142,72 @@ pub fn no_error_2() {} /// pub fn no_error_3() {} +/// >

class="foo"> +/// >
+pub fn no_error_4() {} + /// unfinished ALLOWED_UNCLOSED /// +/// note: CommonMark doesn't allow an html block to start with a multiline tag, +/// so we use `
` a bunch to force these to be parsed as html blocks. +/// ///
/// -//~^ ERROR unclosed HTML tag `img` +//~^ ERROR incomplete HTML tag `img` pub fn r() {} + +/// >
+/// > href="#broken" +pub fn s() {} + +///
+/// +//~^ ERROR incomplete HTML tag `br` +pub fn t() {} + +///
+///
html5 allows this
+pub fn no_error_5() {} + +///
+/// +pub fn no_error_6() {} + +///
+/// what +pub fn no_error_7() {} + +/// Technically this is allowed per the html5 spec, +/// but there's basically no legitemate reason to do it, +/// so we don't allow it. +/// +///

foobar

+//~^ ERROR Unclosed HTML comment +//~| ERROR incomplete HTML tag `p` +pub fn v() {} diff --git a/tests/rustdoc-ui/lints/invalid-html-tags.stderr b/tests/rustdoc-ui/lints/invalid-html-tags.stderr index fc9849ff23cff..b6ec22c247901 100644 --- a/tests/rustdoc-ui/lints/invalid-html-tags.stderr +++ b/tests/rustdoc-ui/lints/invalid-html-tags.stderr @@ -52,6 +52,12 @@ error: unclosed HTML tag `p` LL | ///

| ^^^ +error: incomplete HTML tag `script` + --> $DIR/invalid-html-tags.rs:45:5 + | +LL | ///