|
| 1 | +use crate::cursor; |
| 2 | +use crate::extractor::machine::{Machine, MachineState}; |
| 3 | +use crate::extractor::string_machine::StringMachine; |
| 4 | +use crate::extractor::CssVariableMachine; |
| 5 | + |
| 6 | +#[derive(Debug, Default)] |
| 7 | +pub(crate) struct ArbitraryPropertyMachine { |
| 8 | + /// Start position of the arbitrary value |
| 9 | + start_pos: usize, |
| 10 | + |
| 11 | + /// Bracket stack to ensure properly balanced brackets |
| 12 | + bracket_stack: Vec<u8>, |
| 13 | + |
| 14 | + /// Ignore the characters until this specific position |
| 15 | + skip_until_pos: Option<usize>, |
| 16 | + |
| 17 | + /// Current state of the machine |
| 18 | + state: State, |
| 19 | + |
| 20 | + css_variable_machine: CssVariableMachine, |
| 21 | + string_machine: StringMachine, |
| 22 | +} |
| 23 | + |
| 24 | +#[derive(Debug, Default)] |
| 25 | +enum State { |
| 26 | + #[default] |
| 27 | + Idle, |
| 28 | + |
| 29 | + /// Parsing the property, e.g.: |
| 30 | + /// |
| 31 | + /// ``` |
| 32 | + /// [color:red] |
| 33 | + /// ^^^^^ |
| 34 | + /// ``` |
| 35 | + ParsingProperty, |
| 36 | + |
| 37 | + /// Parsing the property (which is a CSS variable), e.g.: |
| 38 | + /// |
| 39 | + /// ``` |
| 40 | + /// [--my-color:red] |
| 41 | + /// ^^^^^^^^^^ |
| 42 | + /// ``` |
| 43 | + ParsingPropertyVariable, |
| 44 | + |
| 45 | + /// Parsing the value, e.g.: |
| 46 | + /// |
| 47 | + /// ``` |
| 48 | + /// [color:red] |
| 49 | + /// ^^^ |
| 50 | + /// ``` |
| 51 | + ParsingValue, |
| 52 | + |
| 53 | + /// Parsing a string, in this case the brackets don't need to be balanced when inside of a |
| 54 | + /// string. |
| 55 | + ParsingString, |
| 56 | +} |
| 57 | + |
| 58 | +impl Machine for ArbitraryPropertyMachine { |
| 59 | + fn next(&mut self, cursor: &cursor::Cursor<'_>) -> MachineState { |
| 60 | + // Skipping characters until a specific position |
| 61 | + match self.skip_until_pos { |
| 62 | + Some(skip_until) if cursor.pos < skip_until => return MachineState::Parsing, |
| 63 | + Some(_) => self.skip_until_pos = None, |
| 64 | + None => {} |
| 65 | + } |
| 66 | + |
| 67 | + match self.state { |
| 68 | + State::Idle => match cursor.curr { |
| 69 | + // Start of an arbitrary property |
| 70 | + b'[' => { |
| 71 | + self.start_pos = cursor.pos; |
| 72 | + self.state = State::ParsingProperty; |
| 73 | + MachineState::Parsing |
| 74 | + } |
| 75 | + |
| 76 | + // Anything else is not a valid start of an arbitrary value |
| 77 | + _ => MachineState::Idle, |
| 78 | + }, |
| 79 | + |
| 80 | + State::ParsingProperty => match (cursor.curr, cursor.next) { |
| 81 | + // Start of a CSS variable |
| 82 | + (b'-', b'-') => { |
| 83 | + self.css_variable_machine.next(cursor); |
| 84 | + self.state = State::ParsingPropertyVariable; |
| 85 | + MachineState::Parsing |
| 86 | + } |
| 87 | + |
| 88 | + // Only alphanumeric characters and dashes are allowed |
| 89 | + (b'a'..=b'z' | b'A'..=b'Z' | b'-', _) => MachineState::Parsing, |
| 90 | + |
| 91 | + // End of the property name, but there must be at least a single character |
| 92 | + (b':', _) if cursor.pos > self.start_pos + 1 => { |
| 93 | + self.state = State::ParsingValue; |
| 94 | + MachineState::Parsing |
| 95 | + } |
| 96 | + |
| 97 | + // Anything else is not a valid property character |
| 98 | + _ => self.restart(), |
| 99 | + }, |
| 100 | + |
| 101 | + State::ParsingPropertyVariable => match self.css_variable_machine.next(cursor) { |
| 102 | + MachineState::Idle => self.restart(), |
| 103 | + MachineState::Parsing => MachineState::Parsing, |
| 104 | + MachineState::Done(_) => match cursor.next { |
| 105 | + // End of the CSS variable, must be followed by a `:` |
| 106 | + // |
| 107 | + // E.g.: `[--my-color:red]` |
| 108 | + // ^ |
| 109 | + b':' => { |
| 110 | + self.skip_until_pos = Some(cursor.pos + 2); |
| 111 | + self.state = State::ParsingValue; |
| 112 | + MachineState::Parsing |
| 113 | + } |
| 114 | + |
| 115 | + // Invalid arbitrary property |
| 116 | + _ => self.restart(), |
| 117 | + }, |
| 118 | + }, |
| 119 | + |
| 120 | + State::ParsingValue => match cursor.curr { |
| 121 | + // An escaped character, skip ahead to the next character |
| 122 | + b'\\' if !cursor.at_end => { |
| 123 | + self.skip_until_pos = Some(cursor.pos + 2); |
| 124 | + MachineState::Parsing |
| 125 | + } |
| 126 | + |
| 127 | + // An escaped whitespace character is not allowed |
| 128 | + b'\\' if cursor.next.is_ascii_whitespace() => self.restart(), |
| 129 | + |
| 130 | + b'(' => { |
| 131 | + self.bracket_stack.push(b')'); |
| 132 | + MachineState::Parsing |
| 133 | + } |
| 134 | + |
| 135 | + b'[' => { |
| 136 | + self.bracket_stack.push(b']'); |
| 137 | + MachineState::Parsing |
| 138 | + } |
| 139 | + |
| 140 | + b'{' => { |
| 141 | + self.bracket_stack.push(b'}'); |
| 142 | + MachineState::Parsing |
| 143 | + } |
| 144 | + |
| 145 | + b')' | b']' | b'}' if !self.bracket_stack.is_empty() => { |
| 146 | + if let Some(&expected) = self.bracket_stack.last() { |
| 147 | + if cursor.curr == expected { |
| 148 | + self.bracket_stack.pop(); |
| 149 | + } else { |
| 150 | + return self.restart(); |
| 151 | + } |
| 152 | + } |
| 153 | + |
| 154 | + MachineState::Parsing |
| 155 | + } |
| 156 | + |
| 157 | + // End of an arbitrary value |
| 158 | + // 1. All brackets must be balanced |
| 159 | + // 2. There must be at least a single character inside the brackets |
| 160 | + b']' if self.bracket_stack.is_empty() && self.start_pos + 1 != cursor.pos => { |
| 161 | + self.done(self.start_pos, cursor) |
| 162 | + } |
| 163 | + |
| 164 | + // Start of a string |
| 165 | + b'"' | b'\'' | b'`' => { |
| 166 | + self.string_machine.next(cursor); |
| 167 | + self.state = State::ParsingString; |
| 168 | + MachineState::Parsing |
| 169 | + } |
| 170 | + |
| 171 | + // Another `:` inside of an arbitrary property is only valid inside of a string or |
| 172 | + // inside of brackets. Everywhere else, it's invalid. |
| 173 | + // |
| 174 | + // E.g.: `[color:red:blue]` |
| 175 | + // ^ Not valid |
| 176 | + // E.g.: `[background:url(https://example.com)]` |
| 177 | + // ^ Valid |
| 178 | + // E.g.: `[content:'a:b:c:']` |
| 179 | + // ^ ^ ^ Valid |
| 180 | + b':' if self.bracket_stack.is_empty() => self.restart(), |
| 181 | + |
| 182 | + // Any kind of whitespace is not allowed |
| 183 | + x if x.is_ascii_whitespace() => self.restart(), |
| 184 | + |
| 185 | + // Everything else is valid |
| 186 | + _ => MachineState::Parsing, |
| 187 | + }, |
| 188 | + |
| 189 | + State::ParsingString => match self.string_machine.next(cursor) { |
| 190 | + MachineState::Idle => self.restart(), |
| 191 | + MachineState::Parsing => MachineState::Parsing, |
| 192 | + MachineState::Done(_) => { |
| 193 | + self.state = State::ParsingProperty; |
| 194 | + MachineState::Parsing |
| 195 | + } |
| 196 | + }, |
| 197 | + } |
| 198 | + } |
| 199 | +} |
| 200 | + |
| 201 | +#[cfg(test)] |
| 202 | +mod tests { |
| 203 | + use super::ArbitraryPropertyMachine; |
| 204 | + use crate::cursor::Cursor; |
| 205 | + use crate::extractor::machine::{Machine, MachineState}; |
| 206 | + |
| 207 | + #[test] |
| 208 | + fn test_arbitrary_property_extraction() { |
| 209 | + for (input, expected) in [ |
| 210 | + // Simple arbitrary property |
| 211 | + ("[color:red]", vec!["[color:red]"]), |
| 212 | + // Name with dashes |
| 213 | + ("[background-color:red]", vec!["[background-color:red]"]), |
| 214 | + // Name with leading `-` is valid |
| 215 | + ("[-webkit-value:red]", vec!["[-webkit-value:red]"]), |
| 216 | + // Setting a CSS Variable |
| 217 | + ("[--my-color:red]", vec!["[--my-color:red]"]), |
| 218 | + // -------------------------------------------------------- |
| 219 | + |
| 220 | + // Invalid CSS Variable |
| 221 | + ("[--my#color:red]", vec![]), |
| 222 | + // Spaces are not allowed |
| 223 | + ("[color: red]", vec![]), |
| 224 | + // Multiple colons are not allowed |
| 225 | + ("[color:red:blue]", vec![]), |
| 226 | + // Only alphanumeric characters are allowed |
| 227 | + ("[background_color:red]", vec![]), |
| 228 | + // A color is required |
| 229 | + ("[red]", vec![]), |
| 230 | + // The property cannot be empty |
| 231 | + ("[:red]", vec![]), |
| 232 | + // Empty brackets are not allowed |
| 233 | + ("[]", vec![]), |
| 234 | + // Missing colon in more complex example |
| 235 | + (r#"[CssClass("gap-y-4")]"#, vec![]), |
| 236 | + ] { |
| 237 | + let mut machine = ArbitraryPropertyMachine::default(); |
| 238 | + let mut cursor = Cursor::new(input.as_bytes()); |
| 239 | + |
| 240 | + let mut actual: Vec<&str> = vec![]; |
| 241 | + |
| 242 | + for i in 0..input.len() { |
| 243 | + cursor.move_to(i); |
| 244 | + |
| 245 | + if let MachineState::Done(span) = machine.next(&cursor) { |
| 246 | + actual.push(unsafe { std::str::from_utf8_unchecked(span.slice(cursor.input)) }); |
| 247 | + } |
| 248 | + } |
| 249 | + |
| 250 | + assert_eq!(actual, expected); |
| 251 | + } |
| 252 | + } |
| 253 | +} |
0 commit comments