13
13
/** @internal */
14
14
final class Parser
15
15
{
16
- private const PATTERN_DATETIME = '#\d\d\d\d-\d\d?-\d\d?(?:(?:[Tt]| ++)\d\d?:\d\d:\d\d(?:\.\d*+)? *+(?:Z|[-+]\d\d?(?::?\d\d)?)?)?$#DA ' ;
17
- private const PATTERN_HEX = '#0x[0-9a-fA-F]++$#DA ' ;
18
- private const PATTERN_OCTAL = '#0o[0-7]++$#DA ' ;
19
- private const PATTERN_BINARY = '#0b[0-1]++$#DA ' ;
20
-
21
- private const SIMPLE_TYPES = [
22
- 'true ' => true , 'True ' => true , 'TRUE ' => true , 'yes ' => true , 'Yes ' => true , 'YES ' => true , 'on ' => true , 'On ' => true , 'ON ' => true ,
23
- 'false ' => false , 'False ' => false , 'FALSE ' => false , 'no ' => false , 'No ' => false , 'NO ' => false , 'off ' => false , 'Off ' => false , 'OFF ' => false ,
24
- 'null ' => null , 'Null ' => null , 'NULL ' => null ,
25
- ];
26
-
27
- private const DEPRECATED_TYPES = ['on ' => 1 , 'On ' => 1 , 'ON ' => 1 , 'off ' => 1 , 'Off ' => 1 , 'OFF ' => 1 ];
28
-
29
- private const ESCAPE_SEQUENCES = [
30
- 't ' => "\t" , 'n ' => "\n" , 'r ' => "\r" , 'f ' => "\x0C" , 'b ' => "\x08" , '" ' => '" ' , '\\' => '\\' , '/ ' => '/ ' , '_ ' => "\u{A0}" ,
31
- ];
32
-
33
16
/** @var TokenStream */
34
17
private $ tokens ;
35
18
@@ -133,11 +116,15 @@ private function parseBlock(string $indent, bool $onlyBullets = false): Node
133
116
private function parseValue (): Node
134
117
{
135
118
if ($ token = $ this ->tokens ->consume (Token::STRING )) {
136
- $ node = new Node \StringNode ($ this ->decodeString ($ token ->value ), $ this ->tokens ->getPos () - 1 );
119
+ try {
120
+ $ node = new Node \StringNode (Node \StringNode::parse ($ token ->value ), $ this ->tokens ->getPos () - 1 );
121
+ } catch (Exception $ e ) {
122
+ $ this ->tokens ->error ($ e ->getMessage (), $ this ->tokens ->getPos () - 1 );
123
+ }
137
124
138
125
} elseif ($ token = $ this ->tokens ->consume (Token::LITERAL )) {
139
126
$ pos = $ this ->tokens ->getPos () - 1 ;
140
- $ node = new Node \LiteralNode ($ this -> literalToValue ($ token ->value , $ this ->tokens ->isNext (': ' , '= ' )), $ pos );
127
+ $ node = new Node \LiteralNode (Node \LiteralNode:: parse ($ token ->value , $ this ->tokens ->isNext (': ' , '= ' )), $ pos );
141
128
142
129
} elseif ($ this ->tokens ->isNext ('[ ' , '( ' , '{ ' )) {
143
130
$ node = $ this ->parseBraces ();
@@ -159,7 +146,7 @@ private function parseEntity(Node $node): Node
159
146
$ entities [] = new Node \EntityNode ($ node , $ attributes ->items , $ node ->startPos , $ attributes ->endPos );
160
147
161
148
while ($ token = $ this ->tokens ->consume (Token::LITERAL )) {
162
- $ valueNode = new Node \LiteralNode ($ this -> literalToValue ($ token ->value ), $ this ->tokens ->getPos () - 1 );
149
+ $ valueNode = new Node \LiteralNode (Node \LiteralNode:: parse ($ token ->value ), $ this ->tokens ->getPos () - 1 );
163
150
if ($ this ->tokens ->isNext ('( ' )) {
164
151
$ attributes = $ this ->parseBraces ();
165
152
$ entities [] = new Node \EntityNode ($ valueNode , $ attributes ->items , $ valueNode ->startPos , $ attributes ->endPos );
@@ -213,41 +200,6 @@ private function parseBraces(): Node\ArrayNode
213
200
}
214
201
215
202
216
- private function decodeString (string $ s ): string
217
- {
218
- if (preg_match ('#^...\n++([\t ]*+)# ' , $ s , $ m )) { // multiline
219
- $ res = substr ($ s , 3 , -3 );
220
- $ res = str_replace ("\n" . $ m [1 ], "\n" , $ res );
221
- $ res = preg_replace ('#^\n|\n[\t ]*+$#D ' , '' , $ res );
222
- } else {
223
- $ res = substr ($ s , 1 , -1 );
224
- if ($ s [0 ] === "' " ) {
225
- $ res = str_replace ("'' " , "' " , $ res );
226
- }
227
- }
228
- if ($ s [0 ] === '" ' ) {
229
- $ res = preg_replace_callback (
230
- '# \\\\(?:ud[89ab][0-9a-f]{2} \\\\ud[c-f][0-9a-f]{2}|u[0-9a-f]{4}|x[0-9a-f]{2}|.)#i ' ,
231
- function (array $ m ): string {
232
- $ sq = $ m [0 ];
233
- if (isset (self ::ESCAPE_SEQUENCES [$ sq [1 ]])) {
234
- return self ::ESCAPE_SEQUENCES [$ sq [1 ]];
235
- } elseif ($ sq [1 ] === 'u ' && strlen ($ sq ) >= 6 ) {
236
- return json_decode ('" ' . $ sq . '" ' ) ?? $ this ->tokens ->error ("Invalid UTF-8 sequence $ sq " , $ this ->tokens ->getPos () - 1 );
237
- } elseif ($ sq [1 ] === 'x ' && strlen ($ sq ) === 4 ) {
238
- trigger_error ("Neon: ' $ sq' is deprecated, use ' \\uXXXX' instead. " , E_USER_DEPRECATED );
239
- return chr (hexdec (substr ($ sq , 2 )));
240
- } else {
241
- $ this ->tokens ->error ("Invalid escaping sequence $ sq " , $ this ->tokens ->getPos () - 1 );
242
- }
243
- },
244
- $ res
245
- );
246
- }
247
- return $ res ;
248
- }
249
-
250
-
251
203
private function checkArrayKey (Node $ key , array &$ arr ): void
252
204
{
253
205
if ((!$ key instanceof Node \StringNode && !$ key instanceof Node \LiteralNode) || !is_scalar ($ key ->value )) {
@@ -259,34 +211,4 @@ private function checkArrayKey(Node $key, array &$arr): void
259
211
}
260
212
$ arr [$ k ] = true ;
261
213
}
262
-
263
-
264
- /** @return mixed */
265
- public function literalToValue (string $ value , bool $ isKey = false )
266
- {
267
- if (!$ isKey && array_key_exists ($ value , self ::SIMPLE_TYPES )) {
268
- if (isset (self ::DEPRECATED_TYPES [$ value ])) {
269
- trigger_error ("Neon: keyword ' $ value' is deprecated, use true/yes or false/no. " , E_USER_DEPRECATED );
270
- }
271
- return self ::SIMPLE_TYPES [$ value ];
272
-
273
- } elseif (is_numeric ($ value )) {
274
- return $ value * 1 ;
275
-
276
- } elseif (preg_match (self ::PATTERN_HEX , $ value )) {
277
- return hexdec ($ value );
278
-
279
- } elseif (preg_match (self ::PATTERN_OCTAL , $ value )) {
280
- return octdec ($ value );
281
-
282
- } elseif (preg_match (self ::PATTERN_BINARY , $ value )) {
283
- return bindec ($ value );
284
-
285
- } elseif (!$ isKey && preg_match (self ::PATTERN_DATETIME , $ value )) {
286
- return new \DateTimeImmutable ($ value );
287
-
288
- } else {
289
- return $ value ;
290
- }
291
- }
292
214
}
0 commit comments