@@ -22,16 +22,16 @@ use self::states::{Rawtext, Rcdata, ScriptData, ScriptDataEscaped};
22
22
use self :: char_ref:: { CharRef , CharRefTokenizer } ;
23
23
24
24
use crate :: util:: str:: lower_ascii_letter;
25
-
26
25
use log:: { debug, trace} ;
27
26
use mac:: format_if;
28
- use markup5ever:: { namespace_url, ns, small_char_set} ;
27
+ use markup5ever:: buffer_queue:: BufferQueue ;
28
+ use markup5ever:: { namespace_url, ns, small_char_set, InputSink , InputSinkResult } ;
29
29
use std:: borrow:: Cow :: { self , Borrowed } ;
30
30
use std:: cell:: { Cell , RefCell , RefMut } ;
31
31
use std:: collections:: BTreeMap ;
32
- use std:: mem;
32
+ use std:: { iter , mem} ;
33
33
34
- pub use crate :: buffer_queue:: { BufferQueue , FromSet , NotFromSet , SetResult } ;
34
+ pub use crate :: buffer_queue:: { FromSet , NotFromSet , SetResult } ;
35
35
use crate :: tendril:: StrTendril ;
36
36
use crate :: { Attribute , LocalName , QualName , SmallCharSet } ;
37
37
@@ -43,13 +43,17 @@ pub enum ProcessResult<Handle> {
43
43
Continue ,
44
44
Suspend ,
45
45
Script ( Handle ) ,
46
+ #[ cfg( feature = "encoding" ) ]
47
+ MaybeChangeEncodingAndStartOver ( & ' static encoding_rs:: Encoding ) ,
46
48
}
47
49
48
50
#[ must_use]
49
51
#[ derive( Debug ) ]
50
52
pub enum TokenizerResult < Handle > {
51
53
Done ,
52
54
Script ( Handle ) ,
55
+ #[ cfg( feature = "encoding" ) ]
56
+ MaybeChangeEncodingAndStartOver ( & ' static encoding_rs:: Encoding ) ,
53
57
}
54
58
55
59
fn option_push ( opt_str : & mut Option < StrTendril > , c : char ) {
@@ -364,6 +368,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
364
368
ProcessResult :: Continue => ( ) ,
365
369
ProcessResult :: Suspend => break ,
366
370
ProcessResult :: Script ( node) => return TokenizerResult :: Script ( node) ,
371
+ #[ cfg( feature = "encoding" ) ]
372
+ ProcessResult :: MaybeChangeEncodingAndStartOver ( encoding) => {
373
+ return TokenizerResult :: MaybeChangeEncodingAndStartOver ( encoding)
374
+ } ,
367
375
}
368
376
}
369
377
} else {
@@ -372,6 +380,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
372
380
ProcessResult :: Continue => ( ) ,
373
381
ProcessResult :: Suspend => break ,
374
382
ProcessResult :: Script ( node) => return TokenizerResult :: Script ( node) ,
383
+ #[ cfg( feature = "encoding" ) ]
384
+ ProcessResult :: MaybeChangeEncodingAndStartOver ( encoding) => {
385
+ return TokenizerResult :: MaybeChangeEncodingAndStartOver ( encoding)
386
+ } ,
375
387
}
376
388
}
377
389
}
@@ -452,6 +464,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
452
464
self . state . set ( states:: RawData ( kind) ) ;
453
465
ProcessResult :: Continue
454
466
} ,
467
+ #[ cfg( feature = "encoding" ) ]
468
+ TokenSinkResult :: MaybeChangeEncodingAndStartOver ( encoding) => {
469
+ ProcessResult :: MaybeChangeEncodingAndStartOver ( encoding)
470
+ } ,
455
471
}
456
472
}
457
473
@@ -1455,6 +1471,8 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
1455
1471
ProcessResult :: Continue => ( ) ,
1456
1472
ProcessResult :: Suspend => break ,
1457
1473
ProcessResult :: Script ( _) => unreachable ! ( ) ,
1474
+ #[ cfg( feature = "encoding" ) ]
1475
+ ProcessResult :: MaybeChangeEncodingAndStartOver ( _) => unreachable ! ( ) ,
1458
1476
}
1459
1477
}
1460
1478
@@ -1582,13 +1600,36 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
1582
1600
}
1583
1601
}
1584
1602
1603
+ impl < Sink > InputSink for Tokenizer < Sink >
1604
+ where
1605
+ Sink : TokenSink ,
1606
+ {
1607
+ type Handle = Sink :: Handle ;
1608
+
1609
+ fn feed ( & self , input : & BufferQueue ) -> impl Iterator < Item = InputSinkResult < Self :: Handle > > {
1610
+ iter:: from_fn ( || self . feed ( input) . into ( ) )
1611
+ }
1612
+ }
1613
+
1614
+ impl < Handle > From < TokenizerResult < Handle > > for Option < InputSinkResult < Handle > > {
1615
+ fn from ( value : TokenizerResult < Handle > ) -> Self {
1616
+ match value {
1617
+ TokenizerResult :: Script ( handle) => Some ( InputSinkResult :: HandleScript ( handle) ) ,
1618
+ TokenizerResult :: MaybeChangeEncodingAndStartOver ( encoding) => {
1619
+ Some ( InputSinkResult :: MaybeStartOverWithEncoding ( encoding) )
1620
+ } ,
1621
+ TokenizerResult :: Done => None ,
1622
+ }
1623
+ }
1624
+ }
1625
+
1585
1626
#[ cfg( test) ]
1586
1627
#[ allow( non_snake_case) ]
1587
1628
mod test {
1588
1629
use super :: option_push; // private items
1589
- use crate :: tendril:: { SliceExt , StrTendril } ;
1590
-
1591
1630
use super :: { TokenSink , TokenSinkResult , Tokenizer , TokenizerOpts } ;
1631
+ use crate :: tendril:: { SliceExt , StrTendril } ;
1632
+ use crate :: LocalName ;
1592
1633
1593
1634
use super :: interface:: { CharacterTokens , EOFToken , NullCharacterToken , ParseError } ;
1594
1635
use super :: interface:: { EndTag , StartTag , Tag , TagKind } ;
@@ -1597,8 +1638,6 @@ mod test {
1597
1638
use markup5ever:: buffer_queue:: BufferQueue ;
1598
1639
use std:: cell:: RefCell ;
1599
1640
1600
- use crate :: LocalName ;
1601
-
1602
1641
// LinesMatch implements the TokenSink trait. It is used for testing to see
1603
1642
// if current_line is being updated when process_token is called. The lines
1604
1643
// vector is a collection of the line numbers that each token is on.
0 commit comments