Skip to content

Commit d39864d

Browse files
committed
Auto merge of #96135 - petrochenkov:doclink6, r=GuillaumeGomez
rustdoc: Optimize and refactor doc link resolution One more subset of #94857 that should bring perf improvements rather than regressions + a couple more optimizations on top of it. It's better to read individual commits and their descriptions to understand the changes. The `may_have_doc_links` optimization is not *very* useful here, but it's much more important for #94857. Closes #96079
2 parents 0034bbc + ca5c752 commit d39864d

File tree

17 files changed

+275
-134
lines changed

17 files changed

+275
-134
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -2894,6 +2894,7 @@ dependencies = [
28942894
name = "proc_macro"
28952895
version = "0.0.0"
28962896
dependencies = [
2897+
"core",
28972898
"std",
28982899
]
28992900

compiler/rustc_ast/src/attr/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::token::{self, CommentKind, Token};
99
use crate::tokenstream::{AttrAnnotatedTokenStream, AttrAnnotatedTokenTree};
1010
use crate::tokenstream::{DelimSpan, Spacing, TokenTree, TreeAndSpacing};
1111
use crate::tokenstream::{LazyTokenStream, TokenStream};
12+
use crate::util::comments;
1213

1314
use rustc_index::bit_set::GrowableBitSet;
1415
use rustc_span::source_map::BytePos;
@@ -262,6 +263,10 @@ impl Attribute {
262263
}
263264
}
264265

266+
pub fn may_have_doc_links(&self) -> bool {
267+
self.doc_str().map_or(false, |s| comments::may_have_doc_links(s.as_str()))
268+
}
269+
265270
pub fn get_normal_item(&self) -> &AttrItem {
266271
match self.kind {
267272
AttrKind::Normal(ref item, _) => item,

compiler/rustc_ast/src/util/comments.rs

+8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ pub struct Comment {
2424
pub pos: BytePos,
2525
}
2626

27+
/// A fast conservative estimate on whether the string can contain documentation links.
28+
/// A pair of square brackets `[]` must exist in the string, but we only search for the
29+
/// opening bracket because brackets always go in pairs in practice.
30+
#[inline]
31+
pub fn may_have_doc_links(s: &str) -> bool {
32+
s.contains('[')
33+
}
34+
2735
/// Makes a doc string more presentable to users.
2836
/// Used by rustdoc and perhaps other tools, but not by rustc.
2937
pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol {

compiler/rustc_metadata/src/rmeta/decoder.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1744,6 +1744,10 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
17441744
adjustments: generator_data.adjustments,
17451745
})
17461746
}
1747+
1748+
fn get_may_have_doc_links(self, index: DefIndex) -> bool {
1749+
self.root.tables.may_have_doc_links.get(self, index).is_some()
1750+
}
17471751
}
17481752

17491753
impl CrateMetadata {

compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs

+4
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,10 @@ impl CStore {
531531
) -> impl Iterator<Item = DefId> + '_ {
532532
self.get_crate_data(cnum).get_all_incoherent_impls()
533533
}
534+
535+
pub fn may_have_doc_links_untracked(&self, def_id: DefId) -> bool {
536+
self.get_crate_data(def_id.krate).get_may_have_doc_links(def_id.index)
537+
}
534538
}
535539

536540
impl CrateStore for CStore {

compiler/rustc_metadata/src/rmeta/encoder.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,14 @@ fn should_encode_generics(def_kind: DefKind) -> bool {
977977
}
978978

979979
impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
980+
fn encode_attrs(&mut self, def_id: DefId) {
981+
let attrs = self.tcx.get_attrs(def_id);
982+
record!(self.tables.attributes[def_id] <- attrs);
983+
if attrs.iter().any(|attr| attr.may_have_doc_links()) {
984+
self.tables.may_have_doc_links.set(def_id.index, ());
985+
}
986+
}
987+
980988
fn encode_def_ids(&mut self) {
981989
if self.is_proc_macro {
982990
return;
@@ -989,7 +997,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
989997
let Some(def_kind) = def_kind else { continue };
990998
self.tables.opt_def_kind.set(def_id.index, def_kind);
991999
record!(self.tables.def_span[def_id] <- tcx.def_span(def_id));
992-
record!(self.tables.attributes[def_id] <- tcx.get_attrs(def_id));
1000+
self.encode_attrs(def_id);
9931001
record!(self.tables.expn_that_defined[def_id] <- self.tcx.expn_that_defined(def_id));
9941002
if should_encode_visibility(def_kind) {
9951003
record!(self.tables.visibility[def_id] <- self.tcx.visibility(def_id));
@@ -1651,7 +1659,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
16511659

16521660
self.tables.opt_def_kind.set(LOCAL_CRATE.as_def_id().index, DefKind::Mod);
16531661
record!(self.tables.def_span[LOCAL_CRATE.as_def_id()] <- tcx.def_span(LOCAL_CRATE.as_def_id()));
1654-
record!(self.tables.attributes[LOCAL_CRATE.as_def_id()] <- tcx.get_attrs(LOCAL_CRATE.as_def_id()));
1662+
self.encode_attrs(LOCAL_CRATE.as_def_id());
16551663
record!(self.tables.visibility[LOCAL_CRATE.as_def_id()] <- tcx.visibility(LOCAL_CRATE.as_def_id()));
16561664
if let Some(stability) = stability {
16571665
record!(self.tables.lookup_stability[LOCAL_CRATE.as_def_id()] <- stability);
@@ -1692,7 +1700,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
16921700
let def_id = id.to_def_id();
16931701
self.tables.opt_def_kind.set(def_id.index, DefKind::Macro(macro_kind));
16941702
record!(self.tables.kind[def_id] <- EntryKind::ProcMacro(macro_kind));
1695-
record!(self.tables.attributes[def_id] <- attrs);
1703+
self.encode_attrs(def_id);
16961704
record!(self.tables.def_keys[def_id] <- def_key);
16971705
record!(self.tables.def_ident_span[def_id] <- span);
16981706
record!(self.tables.def_span[def_id] <- span);

compiler/rustc_metadata/src/rmeta/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ define_tables! {
360360
def_path_hashes: Table<DefIndex, DefPathHash>,
361361
proc_macro_quoted_spans: Table<usize, Lazy<Span>>,
362362
generator_diagnostic_data: Table<DefIndex, Lazy<GeneratorDiagnosticData<'tcx>>>,
363+
may_have_doc_links: Table<DefIndex, ()>,
363364
}
364365

365366
#[derive(Copy, Clone, MetadataEncodable, MetadataDecodable)]

compiler/rustc_metadata/src/rmeta/table.rs

+14
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,20 @@ impl FixedSizeEncoding for Option<RawDefId> {
186186
}
187187
}
188188

189+
impl FixedSizeEncoding for Option<()> {
190+
type ByteArray = [u8; 1];
191+
192+
#[inline]
193+
fn from_bytes(b: &[u8; 1]) -> Self {
194+
(b[0] != 0).then(|| ())
195+
}
196+
197+
#[inline]
198+
fn write_to_bytes(self, b: &mut [u8; 1]) {
199+
b[0] = self.is_some() as u8
200+
}
201+
}
202+
189203
// NOTE(eddyb) there could be an impl for `usize`, which would enable a more
190204
// generic `Lazy<T>` impl, but in the general case we might not need / want to
191205
// fit every `usize` in `u32`.

library/proc_macro/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,7 @@ edition = "2021"
55

66
[dependencies]
77
std = { path = "../std" }
8+
# Workaround: when documenting this crate rustdoc will try to load crate named
9+
# `core` when resolving doc links. Without this line a different `core` will be
10+
# loaded from sysroot causing duplicate lang items and other similar errors.
11+
core = { path = "../core" }

src/librustdoc/clean/types.rs

+32-38
Original file line numberDiff line numberDiff line change
@@ -1089,35 +1089,35 @@ impl Attributes {
10891089
attrs: &[ast::Attribute],
10901090
additional_attrs: Option<(&[ast::Attribute], DefId)>,
10911091
) -> Attributes {
1092-
let mut doc_strings: Vec<DocFragment> = vec![];
1093-
let clean_attr = |(attr, parent_module): (&ast::Attribute, Option<DefId>)| {
1094-
if let Some((value, kind)) = attr.doc_str_and_comment_kind() {
1095-
trace!("got doc_str={:?}", value);
1096-
let value = beautify_doc_string(value, kind);
1092+
// Additional documentation should be shown before the original documentation.
1093+
let attrs1 = additional_attrs
1094+
.into_iter()
1095+
.flat_map(|(attrs, def_id)| attrs.iter().map(move |attr| (attr, Some(def_id))));
1096+
let attrs2 = attrs.iter().map(|attr| (attr, None));
1097+
Attributes::from_ast_iter(attrs1.chain(attrs2), false)
1098+
}
1099+
1100+
crate fn from_ast_iter<'a>(
1101+
attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>,
1102+
doc_only: bool,
1103+
) -> Attributes {
1104+
let mut doc_strings = Vec::new();
1105+
let mut other_attrs = Vec::new();
1106+
for (attr, parent_module) in attrs {
1107+
if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
1108+
trace!("got doc_str={doc_str:?}");
1109+
let doc = beautify_doc_string(doc_str, comment_kind);
10971110
let kind = if attr.is_doc_comment() {
10981111
DocFragmentKind::SugaredDoc
10991112
} else {
11001113
DocFragmentKind::RawDoc
11011114
};
1102-
1103-
let frag =
1104-
DocFragment { span: attr.span, doc: value, kind, parent_module, indent: 0 };
1105-
1106-
doc_strings.push(frag);
1107-
1108-
None
1109-
} else {
1110-
Some(attr.clone())
1115+
let fragment = DocFragment { span: attr.span, doc, kind, parent_module, indent: 0 };
1116+
doc_strings.push(fragment);
1117+
} else if !doc_only {
1118+
other_attrs.push(attr.clone());
11111119
}
1112-
};
1113-
1114-
// Additional documentation should be shown before the original documentation
1115-
let other_attrs = additional_attrs
1116-
.into_iter()
1117-
.flat_map(|(attrs, id)| attrs.iter().map(move |attr| (attr, Some(id))))
1118-
.chain(attrs.iter().map(|attr| (attr, None)))
1119-
.filter_map(clean_attr)
1120-
.collect();
1120+
}
11211121

11221122
Attributes { doc_strings, other_attrs }
11231123
}
@@ -1138,23 +1138,17 @@ impl Attributes {
11381138
}
11391139

11401140
/// Return the doc-comments on this item, grouped by the module they came from.
1141-
///
11421141
/// The module can be different if this is a re-export with added documentation.
1143-
crate fn collapsed_doc_value_by_module_level(&self) -> FxHashMap<Option<DefId>, String> {
1144-
let mut ret = FxHashMap::default();
1145-
if self.doc_strings.len() == 0 {
1146-
return ret;
1147-
}
1148-
let last_index = self.doc_strings.len() - 1;
1149-
1150-
for (i, new_frag) in self.doc_strings.iter().enumerate() {
1151-
let out = ret.entry(new_frag.parent_module).or_default();
1152-
add_doc_fragment(out, new_frag);
1153-
if i == last_index {
1154-
out.pop();
1155-
}
1142+
///
1143+
/// The last newline is not trimmed so the produced strings are reusable between
1144+
/// early and late doc link resolution regardless of their position.
1145+
crate fn prepare_to_doc_link_resolution(&self) -> FxHashMap<Option<DefId>, String> {
1146+
let mut res = FxHashMap::default();
1147+
for fragment in &self.doc_strings {
1148+
let out_str = res.entry(fragment.parent_module).or_default();
1149+
add_doc_fragment(out_str, fragment);
11561150
}
1157-
ret
1151+
res
11581152
}
11591153

11601154
/// Finds all `doc` attributes as NameValues and returns their corresponding values, joined

src/librustdoc/core.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use rustc_data_structures::sync::{self, Lrc};
44
use rustc_errors::emitter::{Emitter, EmitterWriter};
55
use rustc_errors::json::JsonEmitter;
66
use rustc_feature::UnstableFeatures;
7-
use rustc_hir::def::Res;
7+
use rustc_hir::def::{Namespace, Res};
88
use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId};
99
use rustc_hir::intravisit::{self, Visitor};
1010
use rustc_hir::{HirId, Path, TraitCandidate};
@@ -29,11 +29,14 @@ use crate::clean::inline::build_external_trait;
2929
use crate::clean::{self, ItemId, TraitWithExtraInfo};
3030
use crate::config::{Options as RustdocOptions, OutputFormat, RenderOptions};
3131
use crate::formats::cache::Cache;
32+
use crate::passes::collect_intra_doc_links::PreprocessedMarkdownLink;
3233
use crate::passes::{self, Condition::*};
3334

3435
crate use rustc_session::config::{DebuggingOptions, Input, Options};
3536

3637
crate struct ResolverCaches {
38+
crate markdown_links: Option<FxHashMap<String, Vec<PreprocessedMarkdownLink>>>,
39+
crate doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option<Res<NodeId>>>,
3740
/// Traits in scope for a given module.
3841
/// See `collect_intra_doc_links::traits_implemented_by` for more details.
3942
crate traits_in_scope: DefIdMap<Vec<TraitCandidate>>,

src/librustdoc/html/markdown.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -1255,7 +1255,7 @@ crate struct MarkdownLink {
12551255
pub range: Range<usize>,
12561256
}
12571257

1258-
crate fn markdown_links(md: &str) -> Vec<MarkdownLink> {
1258+
crate fn markdown_links<R>(md: &str, filter_map: impl Fn(MarkdownLink) -> Option<R>) -> Vec<R> {
12591259
if md.is_empty() {
12601260
return vec![];
12611261
}
@@ -1295,11 +1295,12 @@ crate fn markdown_links(md: &str) -> Vec<MarkdownLink> {
12951295

12961296
let mut push = |link: BrokenLink<'_>| {
12971297
let span = span_for_link(&link.reference, link.span);
1298-
links.borrow_mut().push(MarkdownLink {
1298+
filter_map(MarkdownLink {
12991299
kind: LinkType::ShortcutUnknown,
13001300
link: link.reference.to_string(),
13011301
range: span,
1302-
});
1302+
})
1303+
.map(|link| links.borrow_mut().push(link));
13031304
None
13041305
};
13051306
let p = Parser::new_with_broken_link_callback(md, main_body_opts(), Some(&mut push))
@@ -1314,7 +1315,8 @@ crate fn markdown_links(md: &str) -> Vec<MarkdownLink> {
13141315
if let Event::Start(Tag::Link(kind, dest, _)) = ev.0 {
13151316
debug!("found link: {dest}");
13161317
let span = span_for_link(&dest, ev.1);
1317-
links.borrow_mut().push(MarkdownLink { kind, link: dest.into_string(), range: span });
1318+
filter_map(MarkdownLink { kind, link: dest.into_string(), range: span })
1319+
.map(|link| links.borrow_mut().push(link));
13181320
}
13191321
}
13201322

0 commit comments

Comments
 (0)