Skip to content

internal: Don't mutate syntax trees when preparing proc-macro input #10025

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/hir_expand/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ log = "0.4.8"
either = "1.5.3"
rustc-hash = "1.0.0"
la-arena = { version = "0.2.0", path = "../../lib/arena" }
itertools = "0.10.0"

base_db = { path = "../base_db", version = "0.0.0" }
cfg = { path = "../cfg", version = "0.0.0" }
Expand Down
35 changes: 27 additions & 8 deletions crates/hir_expand/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,20 @@
use std::sync::Arc;

use base_db::{salsa, SourceDatabase};
use itertools::Itertools;
use limit::Limit;
use mbe::{ExpandError, ExpandResult};
use parser::{FragmentKind, T};
use syntax::{
algo::diff,
ast::{self, NameOwner},
AstNode, GreenNode, Parse, SyntaxNode, SyntaxToken,
ast::{self, AttrsOwner, NameOwner},
AstNode, GreenNode, Parse, SyntaxNode, SyntaxToken, TextRange,
};

use crate::{
ast_id_map::AstIdMap, hygiene::HygieneFrame, input::process_macro_input, BuiltinAttrExpander,
BuiltinDeriveExpander, BuiltinFnLikeExpander, HirFileId, HirFileIdRepr, MacroCallId,
MacroCallKind, MacroCallLoc, MacroDefId, MacroDefKind, MacroFile, ProcMacroExpander,
ast_id_map::AstIdMap, hygiene::HygieneFrame, BuiltinAttrExpander, BuiltinDeriveExpander,
BuiltinFnLikeExpander, HirFileId, HirFileIdRepr, MacroCallId, MacroCallKind, MacroCallLoc,
MacroDefId, MacroDefKind, MacroFile, ProcMacroExpander,
};

/// Total limit on the number of tokens produced by any macro invocation.
Expand Down Expand Up @@ -257,9 +258,28 @@ fn parse_macro_expansion(

fn macro_arg(db: &dyn AstDatabase, id: MacroCallId) -> Option<Arc<(tt::Subtree, mbe::TokenMap)>> {
let arg = db.macro_arg_text(id)?;
let (mut tt, tmap) = mbe::syntax_node_to_token_tree(&SyntaxNode::new_root(arg));
let loc = db.lookup_intern_macro(id);

let node = SyntaxNode::new_root(arg);
let censor = match loc.kind {
MacroCallKind::FnLike { .. } => None,
MacroCallKind::Derive { derive_attr_index, .. } => match ast::Item::cast(node.clone()) {
Some(item) => item
.attrs()
.map(|attr| attr.syntax().text_range())
.take(derive_attr_index as usize + 1)
.fold1(TextRange::cover),
None => None,
},
MacroCallKind::Attr { invoc_attr_index, .. } => match ast::Item::cast(node.clone()) {
Some(item) => {
item.attrs().nth(invoc_attr_index as usize).map(|attr| attr.syntax().text_range())
}
None => None,
},
};
let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored(&node, censor);

let loc: MacroCallLoc = db.lookup_intern_macro(id);
if loc.def.is_proc_macro() {
// proc macros expect their inputs without parentheses, MBEs expect it with them included
tt.delimiter = None;
Expand All @@ -271,7 +291,6 @@ fn macro_arg(db: &dyn AstDatabase, id: MacroCallId) -> Option<Arc<(tt::Subtree,
fn macro_arg_text(db: &dyn AstDatabase, id: MacroCallId) -> Option<GreenNode> {
let loc = db.lookup_intern_macro(id);
let arg = loc.kind.arg(db)?;
let arg = process_macro_input(&loc.kind, arg);
if matches!(loc.kind, MacroCallKind::FnLike { .. }) {
let first = arg.first_child_or_token().map_or(T![.], |it| it.kind());
let last = arg.last_child_or_token().map_or(T![.], |it| it.kind());
Expand Down
120 changes: 0 additions & 120 deletions crates/hir_expand/src/input.rs

This file was deleted.

1 change: 0 additions & 1 deletion crates/hir_expand/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ pub mod builtin_macro;
pub mod proc_macro;
pub mod quote;
pub mod eager;
mod input;

use base_db::ProcMacroKind;
use either::Either;
Expand Down
1 change: 1 addition & 0 deletions crates/mbe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ cov-mark = "2.0.0-pre.1"
rustc-hash = "1.1.0"
smallvec = "1.2.0"
log = "0.4.8"
expect-test = "1.1"

syntax = { path = "../syntax", version = "0.0.0" }
parser = { path = "../parser", version = "0.0.0" }
Expand Down
2 changes: 1 addition & 1 deletion crates/mbe/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ impl fmt::Display for ExpandError {
pub use crate::{
syntax_bridge::{
parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_tree,
token_tree_to_syntax_node,
syntax_node_to_token_tree_censored, token_tree_to_syntax_node,
},
token_map::TokenMap,
};
Expand Down
32 changes: 27 additions & 5 deletions crates/mbe/src/syntax_bridge.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].

use std::iter;

use parser::{FragmentKind, ParseError, TreeSink};
use rustc_hash::FxHashMap;
use syntax::{
Expand All @@ -16,8 +18,17 @@ use crate::{ExpandError, TokenMap};
/// Convert the syntax node to a `TokenTree` (what macro
/// will consume).
pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) {
syntax_node_to_token_tree_censored(node, None)
}

/// Convert the syntax node to a `TokenTree` (what macro will consume)
/// with the censored range excluded.
pub fn syntax_node_to_token_tree_censored(
node: &SyntaxNode,
censor: Option<TextRange>,
) -> (tt::Subtree, TokenMap) {
let global_offset = node.text_range().start();
let mut c = Convertor::new(node, global_offset);
let mut c = Convertor::new(node, global_offset, censor);
let subtree = convert_tokens(&mut c);
c.id_alloc.map.shrink_to_fit();
(subtree, c.id_alloc.map)
Expand Down Expand Up @@ -446,16 +457,24 @@ impl<'a> TokenConvertor for RawConvertor<'a> {
struct Convertor {
id_alloc: TokenIdAlloc,
current: Option<SyntaxToken>,
censor: Option<TextRange>,
range: TextRange,
punct_offset: Option<(SyntaxToken, TextSize)>,
}

impl Convertor {
fn new(node: &SyntaxNode, global_offset: TextSize) -> Convertor {
fn new(node: &SyntaxNode, global_offset: TextSize, censor: Option<TextRange>) -> Convertor {
let first = node.first_token();
let current = match censor {
Some(censor) => iter::successors(first, |token| token.next_token())
.find(|token| !censor.contains_range(token.text_range())),
None => first,
};
Convertor {
id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } },
current: node.first_token(),
current,
range: node.text_range(),
censor,
punct_offset: None,
}
}
Expand Down Expand Up @@ -512,8 +531,11 @@ impl TokenConvertor for Convertor {
if !&self.range.contains_range(curr.text_range()) {
return None;
}
self.current = curr.next_token();

self.current = match self.censor {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh wow, didn't realise that we already use TextRanges here. Ok I guess, but I'd refeactor that away some day -- I don't see a reason for us to rely on raw ranges here :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The if !&self.range.contains_range(curr.text_range()) seems unnecessary in the first place, as we get all tokens from the node that has that range here anyways.

I figured using a TextRange for the censor would be alright given we only want to censor one part in the node so far, which would be simpler to handle than a slice of nodes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I guess that's ok, although, semantically I'd expect a hash set of nodes, which we can .skip_subtree in Preoreder

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like that check wasnt pointless, makes sense that the token iteartion leaves the node if its not a root node.

Using preorder sounds nice compared to iterating the tokens via next_token, will adapt that to that instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually using preorder seems non-trivial since we are interested in the tokens in order but that only gives us the nodes. I'll leave this as is for now.

Some(censor) => iter::successors(curr.next_token(), |token| token.next_token())
.find(|token| !censor.contains_range(token.text_range())),
None => curr.next_token(),
};
let token = if curr.kind().is_punct() {
let range = curr.text_range();
let range = TextRange::at(range.start(), TextSize::of('.'));
Expand Down
41 changes: 41 additions & 0 deletions crates/mbe/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,44 @@ fn debug_dump_ignore_spaces(node: &syntax::SyntaxNode) -> String {

buf
}

#[test]
fn test_node_to_tt_censor() {
use syntax::ast::{AttrsOwner, ModuleItemOwner};

let source = r##"
#[attr0]
#[attr1]
#[attr2]
struct Struct {
field: ()
}
"##;
let source_file = ast::SourceFile::parse(&source).ok().unwrap();
let item = source_file.items().next().unwrap();
let attr = item.attrs().nth(1).unwrap();

let (tt, _) =
syntax_node_to_token_tree_censored(item.syntax(), Some(attr.syntax().text_range()));
expect_test::expect![[r##"# [attr0] # [attr2] struct Struct {field : ()}"##]]
.assert_eq(&tt.to_string());

let source = r##"
#[derive(Derive0)]
#[derive(Derive1)]
#[derive(Derive2)]
struct Struct {
field: ()
}
"##;
let source_file = ast::SourceFile::parse(&source).ok().unwrap();
let item = source_file.items().next().unwrap();
let attr = item.attrs().nth(1).unwrap();

let (tt, _) = syntax_node_to_token_tree_censored(
item.syntax(),
Some(attr.syntax().text_range().cover_offset(0.into())),
);
expect_test::expect![[r##"# [derive (Derive2)] struct Struct {field : ()}"##]]
.assert_eq(&tt.to_string());
}