Skip to content

Commit e2222c9

Browse files
committed
WIP
1 parent e1d79c7 commit e2222c9

16 files changed

+2504
-22
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
use crate::cursor;
2+
use crate::extractor::machine::{Machine, MachineState};
3+
use crate::extractor::string_machine::StringMachine;
4+
use crate::extractor::CssVariableMachine;
5+
6+
#[derive(Debug, Default)]
7+
pub(crate) struct ArbitraryPropertyMachine {
8+
/// Start position of the arbitrary value
9+
start_pos: usize,
10+
11+
/// Bracket stack to ensure properly balanced brackets
12+
bracket_stack: Vec<u8>,
13+
14+
/// Ignore the characters until this specific position
15+
skip_until_pos: Option<usize>,
16+
17+
/// Current state of the machine
18+
state: State,
19+
20+
css_variable_machine: CssVariableMachine,
21+
string_machine: StringMachine,
22+
}
23+
24+
#[derive(Debug, Default)]
25+
enum State {
26+
#[default]
27+
Idle,
28+
29+
/// Parsing the property, e.g.:
30+
///
31+
/// ```
32+
/// [color:red]
33+
/// ^^^^^
34+
/// ```
35+
ParsingProperty,
36+
37+
/// Parsing the property (which is a CSS variable), e.g.:
38+
///
39+
/// ```
40+
/// [--my-color:red]
41+
/// ^^^^^^^^^^
42+
/// ```
43+
ParsingPropertyVariable,
44+
45+
/// Parsing the value, e.g.:
46+
///
47+
/// ```
48+
/// [color:red]
49+
/// ^^^
50+
/// ```
51+
ParsingValue,
52+
53+
/// Parsing a string, in this case the brackets don't need to be balanced when inside of a
54+
/// string.
55+
ParsingString,
56+
}
57+
58+
impl Machine for ArbitraryPropertyMachine {
59+
fn next(&mut self, cursor: &cursor::Cursor<'_>) -> MachineState {
60+
// Skipping characters until a specific position
61+
match self.skip_until_pos {
62+
Some(skip_until) if cursor.pos < skip_until => return MachineState::Parsing,
63+
Some(_) => self.skip_until_pos = None,
64+
None => {}
65+
}
66+
67+
match self.state {
68+
State::Idle => match cursor.curr {
69+
// Start of an arbitrary property
70+
b'[' => {
71+
self.start_pos = cursor.pos;
72+
self.state = State::ParsingProperty;
73+
MachineState::Parsing
74+
}
75+
76+
// Anything else is not a valid start of an arbitrary value
77+
_ => MachineState::Idle,
78+
},
79+
80+
State::ParsingProperty => match (cursor.curr, cursor.next) {
81+
// Start of a CSS variable
82+
(b'-', b'-') => {
83+
self.css_variable_machine.next(cursor);
84+
self.state = State::ParsingPropertyVariable;
85+
MachineState::Parsing
86+
}
87+
88+
// Only alphanumeric characters and dashes are allowed
89+
(b'a'..=b'z' | b'A'..=b'Z' | b'-', _) => MachineState::Parsing,
90+
91+
// End of the property name, but there must be at least a single character
92+
(b':', _) if cursor.pos > self.start_pos + 1 => {
93+
self.state = State::ParsingValue;
94+
MachineState::Parsing
95+
}
96+
97+
// Anything else is not a valid property character
98+
_ => self.restart(),
99+
},
100+
101+
State::ParsingPropertyVariable => match self.css_variable_machine.next(cursor) {
102+
MachineState::Idle => self.restart(),
103+
MachineState::Parsing => MachineState::Parsing,
104+
MachineState::Done(_) => match cursor.next {
105+
// End of the CSS variable, must be followed by a `:`
106+
//
107+
// E.g.: `[--my-color:red]`
108+
// ^
109+
b':' => {
110+
self.skip_until_pos = Some(cursor.pos + 2);
111+
self.state = State::ParsingValue;
112+
MachineState::Parsing
113+
}
114+
115+
// Invalid arbitrary property
116+
_ => self.restart(),
117+
},
118+
},
119+
120+
State::ParsingValue => match cursor.curr {
121+
// An escaped character, skip ahead to the next character
122+
b'\\' if !cursor.at_end => {
123+
self.skip_until_pos = Some(cursor.pos + 2);
124+
MachineState::Parsing
125+
}
126+
127+
// An escaped whitespace character is not allowed
128+
b'\\' if cursor.next.is_ascii_whitespace() => self.restart(),
129+
130+
b'(' => {
131+
self.bracket_stack.push(b')');
132+
MachineState::Parsing
133+
}
134+
135+
b'[' => {
136+
self.bracket_stack.push(b']');
137+
MachineState::Parsing
138+
}
139+
140+
b'{' => {
141+
self.bracket_stack.push(b'}');
142+
MachineState::Parsing
143+
}
144+
145+
b')' | b']' | b'}' if !self.bracket_stack.is_empty() => {
146+
if let Some(&expected) = self.bracket_stack.last() {
147+
if cursor.curr == expected {
148+
self.bracket_stack.pop();
149+
} else {
150+
return self.restart();
151+
}
152+
}
153+
154+
MachineState::Parsing
155+
}
156+
157+
// End of an arbitrary value
158+
// 1. All brackets must be balanced
159+
// 2. There must be at least a single character inside the brackets
160+
b']' if self.bracket_stack.is_empty() && self.start_pos + 1 != cursor.pos => {
161+
self.done(self.start_pos, cursor)
162+
}
163+
164+
// Start of a string
165+
b'"' | b'\'' | b'`' => {
166+
self.string_machine.next(cursor);
167+
self.state = State::ParsingString;
168+
MachineState::Parsing
169+
}
170+
171+
// Another `:` inside of an arbitrary property is only valid inside of a string or
172+
// inside of brackets. Everywhere else, it's invalid.
173+
//
174+
// E.g.: `[color:red:blue]`
175+
// ^ Not valid
176+
// E.g.: `[background:url(https://example.com)]`
177+
// ^ Valid
178+
// E.g.: `[content:'a:b:c:']`
179+
// ^ ^ ^ Valid
180+
b':' if self.bracket_stack.is_empty() => self.restart(),
181+
182+
// Any kind of whitespace is not allowed
183+
x if x.is_ascii_whitespace() => self.restart(),
184+
185+
// Everything else is valid
186+
_ => MachineState::Parsing,
187+
},
188+
189+
State::ParsingString => match self.string_machine.next(cursor) {
190+
MachineState::Idle => self.restart(),
191+
MachineState::Parsing => MachineState::Parsing,
192+
MachineState::Done(_) => {
193+
self.state = State::ParsingProperty;
194+
MachineState::Parsing
195+
}
196+
},
197+
}
198+
}
199+
}
200+
201+
#[cfg(test)]
202+
mod tests {
203+
use super::ArbitraryPropertyMachine;
204+
use crate::cursor::Cursor;
205+
use crate::extractor::machine::{Machine, MachineState};
206+
207+
#[test]
208+
fn test_arbitrary_property_extraction() {
209+
for (input, expected) in [
210+
// Simple arbitrary property
211+
("[color:red]", vec!["[color:red]"]),
212+
// Name with dashes
213+
("[background-color:red]", vec!["[background-color:red]"]),
214+
// Name with leading `-` is valid
215+
("[-webkit-value:red]", vec!["[-webkit-value:red]"]),
216+
// Setting a CSS Variable
217+
("[--my-color:red]", vec!["[--my-color:red]"]),
218+
// --------------------------------------------------------
219+
220+
// Invalid CSS Variable
221+
("[--my#color:red]", vec![]),
222+
// Spaces are not allowed
223+
("[color: red]", vec![]),
224+
// Multiple colons are not allowed
225+
("[color:red:blue]", vec![]),
226+
// Only alphanumeric characters are allowed
227+
("[background_color:red]", vec![]),
228+
// A color is required
229+
("[red]", vec![]),
230+
// The property cannot be empty
231+
("[:red]", vec![]),
232+
// Empty brackets are not allowed
233+
("[]", vec![]),
234+
// Missing colon in more complex example
235+
(r#"[CssClass("gap-y-4")]"#, vec![]),
236+
] {
237+
let mut machine = ArbitraryPropertyMachine::default();
238+
let mut cursor = Cursor::new(input.as_bytes());
239+
240+
let mut actual: Vec<&str> = vec![];
241+
242+
for i in 0..input.len() {
243+
cursor.move_to(i);
244+
245+
if let MachineState::Done(span) = machine.next(&cursor) {
246+
actual.push(unsafe { std::str::from_utf8_unchecked(span.slice(cursor.input)) });
247+
}
248+
}
249+
250+
assert_eq!(actual, expected);
251+
}
252+
}
253+
}

0 commit comments

Comments
 (0)