@@ -191,30 +191,46 @@ pub enum DocStyle {
191191/// `rustc_ast::ast::LitKind`).
192192#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
193193pub enum LiteralKind {
194- /// " 12_u8", " 0o100", " 0b120i99", " 1f32" .
194+ /// ` 12_u8`, ` 0o100`, ` 0b120i99`, ` 1f32` .
195195 Int { base : Base , empty_int : bool } ,
196- /// " 12.34f32", " 1e3" , but not " 1f32" .
196+ /// ` 12.34f32`, ` 1e3` , but not ` 1f32` .
197197 Float { base : Base , empty_exponent : bool } ,
198- /// " 'a'", " '\\'", " '''", "';"
198+ /// ` 'a'`, ` '\\'`, ` '''`, `';`
199199 Char { terminated : bool } ,
200- /// " b'a'", " b'\\'", " b'''", " b';"
200+ /// ` b'a'`, ` b'\\'`, ` b'''`, ` b';`
201201 Byte { terminated : bool } ,
202- /// "" abc"", "" abc"
202+ /// `" abc"`, `" abc`
203203 Str { terminated : bool } ,
204- /// " b"abc"", " b"abc"
204+ /// ` b"abc"`, ` b"abc`
205205 ByteStr { terminated : bool } ,
206206 /// `c"abc"`, `c"abc`
207207 CStr { terminated : bool } ,
208- /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
208+ /// Guarded string literal prefix: `#"` or `##`.
209+ ///
210+ /// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
211+ /// Split into the component tokens on older editions.
212+ GuardedStrPrefix ,
213+ /// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
209214 /// an invalid literal.
210215 RawStr { n_hashes : Option < u8 > } ,
211- /// " br"abc"", " br#"abc"#", " br####"ab"###"c"####", " br#"a" . `None`
216+ /// ` br"abc"`, ` br#"abc"#`, ` br####"ab"###"c"####`, ` br#"a` . `None`
212217 /// indicates an invalid literal.
213218 RawByteStr { n_hashes : Option < u8 > } ,
214219 /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
215220 RawCStr { n_hashes : Option < u8 > } ,
216221}
217222
223+ /// `#"abc"#`, `##"a"` (fewer closing), or even `#"a` (unterminated).
224+ ///
225+ /// Can capture fewer closing hashes than starting hashes,
226+ /// for more efficient lexing and better backwards diagnostics.
227+ #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
228+ pub struct GuardedStr {
229+ pub n_hashes : u32 ,
230+ pub terminated : bool ,
231+ pub token_len : u32 ,
232+ }
233+
218234#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
219235pub enum RawStrError {
220236 /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
@@ -403,6 +419,13 @@ impl Cursor<'_> {
403419 TokenKind :: Literal { kind : literal_kind, suffix_start }
404420 }
405421
422+ // Guarded string literal prefix: `#"` or `##`
423+ '#' if matches ! ( self . first( ) , '"' | '#' ) => {
424+ self . bump ( ) ;
425+ let suffix_start = self . pos_within_token ( ) ;
426+ TokenKind :: Literal { kind : GuardedStrPrefix , suffix_start }
427+ }
428+
406429 // One-symbol tokens.
407430 ';' => Semi ,
408431 ',' => Comma ,
@@ -780,6 +803,60 @@ impl Cursor<'_> {
780803 false
781804 }
782805
806+ /// Attempt to lex for a guarded string literal.
807+ ///
808+ /// Used by `rustc_parse::lexer` to lex for guarded strings
809+ /// conditionally based on edition.
810+ ///
811+ /// Note: this will not reset the `Cursor` when a
812+ /// guarded string is not found. It is the caller's
813+ /// responsibility to do so.
814+ pub fn guarded_double_quoted_string ( & mut self ) -> Option < GuardedStr > {
815+ debug_assert ! ( self . prev( ) != '#' ) ;
816+
817+ let mut n_start_hashes: u32 = 0 ;
818+ while self . first ( ) == '#' {
819+ n_start_hashes += 1 ;
820+ self . bump ( ) ;
821+ }
822+
823+ if self . first ( ) != '"' {
824+ return None ;
825+ }
826+ self . bump ( ) ;
827+ debug_assert ! ( self . prev( ) == '"' ) ;
828+
829+ // Lex the string itself as a normal string literal
830+ // so we can recover that for older editions later.
831+ let terminated = self . double_quoted_string ( ) ;
832+ if !terminated {
833+ let token_len = self . pos_within_token ( ) ;
834+ self . reset_pos_within_token ( ) ;
835+
836+ return Some ( GuardedStr { n_hashes : n_start_hashes, terminated : false , token_len } ) ;
837+ }
838+
839+ // Consume closing '#' symbols.
840+ // Note that this will not consume extra trailing `#` characters:
841+ // `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3, .. }`
842+ // followed by a `#` token.
843+ let mut n_end_hashes = 0 ;
844+ while self . first ( ) == '#' && n_end_hashes < n_start_hashes {
845+ n_end_hashes += 1 ;
846+ self . bump ( ) ;
847+ }
848+
849+ // Reserved syntax, always an error, so it doesn't matter if
850+ // `n_start_hashes != n_end_hashes`.
851+
852+ self . eat_literal_suffix ( ) ;
853+
854+ let token_len = self . pos_within_token ( ) ;
855+ self . reset_pos_within_token ( ) ;
856+
857+ Some ( GuardedStr { n_hashes : n_start_hashes, terminated : true , token_len } )
858+ }
859+
783860 /// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
784861 fn raw_double_quoted_string ( & mut self , prefix_len : u32 ) -> Result < u8 , RawStrError > {
785862 // Wrap the actual function to handle the error with too many hashes.
0 commit comments