diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index 8c2b521c560d9..53969e731e235 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -167,25 +167,27 @@ impl Path { /// /// Panics if `path` is empty or a segment after the first is `kw::PathRoot`. pub fn join_path_syms(path: impl IntoIterator>) -> String { - // This is a guess at the needed capacity that works well in practice. It is slightly faster - // than (a) starting with an empty string, or (b) computing the exact capacity required. - // `8` works well because it's about the right size and jemalloc's size classes are all - // multiples of 8. - let mut iter = path.into_iter(); - let len_hint = iter.size_hint().1.unwrap_or(1); - let mut s = String::with_capacity(len_hint * 8); - - let first_sym = *iter.next().unwrap().borrow(); - if first_sym != kw::PathRoot { - s.push_str(first_sym.as_str()); - } - for sym in iter { - let sym = *sym.borrow(); - debug_assert_ne!(sym, kw::PathRoot); - s.push_str("::"); - s.push_str(sym.as_str()); - } - s + Symbol::with_interner(|interner| { + // This is a guess at the needed capacity that works well in practice. It is slightly + // faster than (a) starting with an empty string, or (b) computing the exact capacity + // required. `8` works well because it's about the right size and jemalloc's size classes + // are all multiples of 8. + let mut iter = path.into_iter(); + let len_hint = iter.size_hint().1.unwrap_or(1); + + let mut s = String::with_capacity(len_hint * 8); + let first_sym = *iter.next().unwrap().borrow(); + if first_sym != kw::PathRoot { + s.push_str(interner.get_str(first_sym)); + } + for sym in iter { + let sym = *sym.borrow(); + debug_assert_ne!(sym, kw::PathRoot); + s.push_str("::"); + s.push_str(interner.get_str(sym)); + } + s + }) } /// Like `join_path_syms`, but for `Ident`s. This function is necessary because diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 78a4d47ca3346..455324430efbc 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -288,7 +288,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base; /// The number of predefined symbols; this is the first index for - /// extra pre-interned symbols in an Interner created via + /// extra pre-interned symbols in an interner created via /// [`Interner::with_extra_symbols`]. pub const PREDEFINED_SYMBOLS_COUNT: u32 = #predefined_symbols_count; diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs index 24e15ded94fef..49920bdd91324 100644 --- a/compiler/rustc_resolve/src/rustdoc.rs +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -141,26 +141,32 @@ pub fn unindent_doc_fragments(docs: &mut [DocFragment]) { // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4 // (5 - 1) whitespaces. - let Some(min_indent) = docs - .iter() - .map(|fragment| { - fragment - .doc - .as_str() - .lines() - .filter(|line| line.chars().any(|c| !c.is_whitespace())) - .map(|line| { - // Compare against either space or tab, ignoring whether they are - // mixed or not. - let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count(); - whitespace - + (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }) + let Some(min_indent) = ({ + Symbol::with_interner(|interner| { + docs.iter() + .map(|fragment| { + interner + .get_str(fragment.doc) + .lines() + .filter(|line| line.chars().any(|c| !c.is_whitespace())) + .map(|line| { + // Compare against either space or tab, ignoring whether they are + // mixed or not. + let whitespace = + line.chars().take_while(|c| *c == ' ' || *c == '\t').count(); + whitespace + + (if fragment.kind == DocFragmentKind::SugaredDoc { + 0 + } else { + add + }) + }) + .min() + .unwrap_or(usize::MAX) }) .min() - .unwrap_or(usize::MAX) }) - .min() - else { + }) else { return; }; diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index d54175548e30e..f84ffab23cd51 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -2632,6 +2632,19 @@ impl Symbol { }) } + /// Runs `f` with access to the symbol interner, so you can call + /// `interner.get_str(sym)` instead of `sym.as_str()`. + /// + /// This is for performance: it lets you get the contents of multiple + /// symbols with a single TLS lookup and interner lock operation, instead + /// of doing those operations once per symbol. + pub fn with_interner(f: impl FnOnce(&InternerInner) -> R) -> R { + with_session_globals(|session_globals| { + let inner = session_globals.symbol_interner.0.lock(); + f(&inner) + }) + } + pub fn as_u32(self) -> u32 { self.0.as_u32() } @@ -2733,14 +2746,13 @@ impl HashStable for ByteSymbol { // string with identical contents (e.g. "foo" and b"foo") are both interned, // only one copy will be stored and the resulting `Symbol` and `ByteSymbol` // will have the same index. +// +// There must only be one of these, otherwise its easy to mix up symbols +// between interners. pub(crate) struct Interner(Lock); // The `&'static [u8]`s in this type actually point into the arena. -// -// This type is private to prevent accidentally constructing more than one -// `Interner` on the same thread, which makes it easy to mix up `Symbol`s -// between `Interner`s. -struct InternerInner { +pub struct InternerInner { arena: DroplessArena, byte_strs: FxIndexSet<&'static [u8]>, } @@ -2794,8 +2806,10 @@ impl Interner { /// Get the symbol as a string. /// /// [`Symbol::as_str()`] should be used in preference to this function. + /// (Or [`Symbol::with_interner()`] + [`InternerInner::get_str()`]). fn get_str(&self, symbol: Symbol) -> &str { - let byte_str = self.get_inner(symbol.0.as_usize()); + let inner = self.0.lock(); + let byte_str = inner.byte_strs.get_index(symbol.0.as_usize()).unwrap(); // SAFETY: known to be a UTF8 string because it's a `Symbol`. unsafe { str::from_utf8_unchecked(byte_str) } } @@ -2803,12 +2817,26 @@ impl Interner { /// Get the symbol as a string. /// /// [`ByteSymbol::as_byte_str()`] should be used in preference to this function. + /// (Or [`Symbol::with_interner()`] + [`InternerInner::get_byte_str()`]). fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] { - self.get_inner(symbol.0.as_usize()) + let inner = self.0.lock(); + inner.byte_strs.get_index(symbol.0.as_usize()).unwrap() } +} - fn get_inner(&self, index: usize) -> &[u8] { - self.0.lock().byte_strs.get_index(index).unwrap() +impl InternerInner { + /// Get the symbol as a string. Used with `with_interner`. + #[inline] + pub fn get_str(&self, symbol: Symbol) -> &str { + let byte_str = self.byte_strs.get_index(symbol.0.as_usize()).unwrap(); + // SAFETY: known to be a UTF8 string because it's a `Symbol`. + unsafe { str::from_utf8_unchecked(byte_str) } + } + + /// Get the symbol as a string. Used with `with_interner`. + #[inline] + pub fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] { + self.byte_strs.get_index(symbol.0.as_usize()).unwrap() } } diff --git a/src/librustdoc/html/render/print_item.rs b/src/librustdoc/html/render/print_item.rs index 02ee34aaac680..aca2eb53c113e 100644 --- a/src/librustdoc/html/render/print_item.rs +++ b/src/librustdoc/html/render/print_item.rs @@ -12,7 +12,7 @@ use rustc_hir::def_id::DefId; use rustc_index::IndexVec; use rustc_middle::ty::{self, TyCtxt}; use rustc_span::hygiene::MacroKind; -use rustc_span::symbol::{Symbol, sym}; +use rustc_span::symbol::{InternerInner, Symbol, sym}; use tracing::{debug, info}; use super::type_layout::document_type_layout; @@ -333,7 +333,12 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i } } - fn cmp(i1: &clean::Item, i2: &clean::Item, tcx: TyCtxt<'_>) -> Ordering { + fn cmp( + i1: &clean::Item, + i2: &clean::Item, + interner: &InternerInner, + tcx: TyCtxt<'_>, + ) -> Ordering { let rty1 = reorder(i1.type_()); let rty2 = reorder(i2.type_()); if rty1 != rty2 { @@ -349,7 +354,9 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i return is_stable2.cmp(&is_stable1); } match (i1.name, i2.name) { - (Some(name1), Some(name2)) => compare_names(name1.as_str(), name2.as_str()), + (Some(name1), Some(name2)) => { + compare_names(interner.get_str(name1), interner.get_str(name2)) + } (Some(_), None) => Ordering::Greater, (None, Some(_)) => Ordering::Less, (None, None) => Ordering::Equal, @@ -360,7 +367,9 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i match cx.shared.module_sorting { ModuleSorting::Alphabetical => { - not_stripped_items.sort_by(|(_, i1), (_, i2)| cmp(i1, i2, tcx)); + Symbol::with_interner(|interner| { + not_stripped_items.sort_by(|(_, i1), (_, i2)| cmp(i1, i2, interner, tcx)); + }); } ModuleSorting::DeclarationOrder => {} } diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index 3c9be29ccc359..6f1765a598b59 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -105,12 +105,16 @@ pub(crate) fn build_index( let mut aliases: BTreeMap> = BTreeMap::new(); // Sort search index items. This improves the compressibility of the search index. - cache.search_index.sort_unstable_by(|k1, k2| { - // `sort_unstable_by_key` produces lifetime errors - // HACK(rustdoc): should not be sorting `CrateNum` or `DefIndex`, this will soon go away, too - let k1 = (&k1.path, k1.name.as_str(), &k1.ty, k1.parent.map(|id| (id.index, id.krate))); - let k2 = (&k2.path, k2.name.as_str(), &k2.ty, k2.parent.map(|id| (id.index, id.krate))); - Ord::cmp(&k1, &k2) + Symbol::with_interner(|inner| { + cache.search_index.sort_unstable_by(|k1, k2| { + // `sort_unstable_by_key` produces lifetime errors + // HACK(rustdoc): should not be sorting `CrateNum` or `DefIndex`, this will soon go + // away, too + let pair = |k: &IndexItem| k.parent.map(|id| (id.index, id.krate)); + let k1 = (&k1.path, inner.get_str(k1.name), &k1.ty, pair(k1)); + let k2 = (&k2.path, inner.get_str(k2.name), &k2.ty, pair(k2)); + Ord::cmp(&k1, &k2) + }); }); // Set up alias indexes. diff --git a/src/librustdoc/html/url_parts_builder.rs b/src/librustdoc/html/url_parts_builder.rs index 705fa498e8d36..09368d69e455d 100644 --- a/src/librustdoc/html/url_parts_builder.rs +++ b/src/librustdoc/html/url_parts_builder.rs @@ -146,22 +146,17 @@ impl<'a> Extend<&'a str> for UrlPartsBuilder { impl FromIterator for UrlPartsBuilder { fn from_iter>(iter: T) -> Self { - // This code has to be duplicated from the `&str` impl because of - // `Symbol::as_str`'s lifetimes. - let iter = iter.into_iter(); - let mut builder = Self::with_capacity_bytes(AVG_PART_LENGTH * iter.size_hint().0); - iter.for_each(|part| builder.push(part.as_str())); - builder + Symbol::with_interner(|interner| { + UrlPartsBuilder::from_iter(iter.into_iter().map(|sym| interner.get_str(sym))) + }) } } impl Extend for UrlPartsBuilder { fn extend>(&mut self, iter: T) { - // This code has to be duplicated from the `&str` impl because of - // `Symbol::as_str`'s lifetimes. - let iter = iter.into_iter(); - self.buf.reserve(AVG_PART_LENGTH * iter.size_hint().0); - iter.for_each(|part| self.push(part.as_str())); + Symbol::with_interner(|interner| { + self.extend(iter.into_iter().map(|sym| interner.get_str(sym))) + }) } }