Skip to content

Commit 6b2b821

Browse files
JanTvrdikdg
authored andcommitted
NeonDecoder: implemented support for unicode surrogate pairs
1 parent eb8a490 commit 6b2b821

File tree

3 files changed

+17
-3
lines changed

3 files changed

+17
-3
lines changed

src/Neon/Decoder.php

+9-3
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ private function parse($indent, $result = NULL, $key = NULL, $hasKey = FALSE)
240240
'false' => FALSE, 'False' => FALSE, 'FALSE' => FALSE, 'no' => FALSE, 'No' => FALSE, 'NO' => FALSE, 'off' => FALSE, 'Off' => FALSE, 'OFF' => FALSE,
241241
);
242242
if ($t[0] === '"') {
243-
$value = preg_replace_callback('#\\\\(?:u[0-9a-f]{4}|x[0-9a-f]{2}|.)#i', array($this, 'cbString'), substr($t, 1, -1));
243+
$value = preg_replace_callback('#\\\\(?:ud[89ab][0-9a-f]{2}\\\\ud[c-f][0-9a-f]{2}|u[0-9a-f]{4}|x[0-9a-f]{2}|.)#i', array($this, 'cbString'), substr($t, 1, -1));
244244
} elseif ($t[0] === "'") {
245245
$value = substr($t, 1, -1);
246246
} elseif (isset($consts[$t]) && (!isset($tokens[$n+1][0]) || ($tokens[$n+1][0] !== ':' && $tokens[$n+1][0] !== '='))) {
@@ -296,8 +296,14 @@ private function cbString($m)
296296
$sq = $m[0];
297297
if (isset($mapping[$sq[1]])) {
298298
return $mapping[$sq[1]];
299-
} elseif ($sq[1] === 'u' && strlen($sq) === 6) {
300-
return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', hexdec(substr($sq, 2))));
299+
} elseif ($sq[1] === 'u' && strlen($sq) >= 6) {
300+
$lead = hexdec(substr($sq, 2, 4));
301+
$tail = hexdec(substr($sq, 8, 4));
302+
$code = $tail ? (0x2400 + (($lead - 0xD800) << 10) + $tail) : $lead;
303+
if ($code >= 0xD800 && $code <= 0xDFFF) {
304+
$this->error("Invalid UTF-8 (lone surrogate) $sq");
305+
}
306+
return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
301307
} elseif ($sq[1] === 'x' && strlen($sq) === 4) {
302308
return chr(hexdec(substr($sq, 2)));
303309
} else {

tests/Neon/Decoder.errors.phpt

+5
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ Assert::exception(function() {
1616
}, 'Nette\Neon\Exception', "Unexpected 'World' on line 2, column 1." );
1717

1818

19+
Assert::exception(function() {
20+
Neon::decode('"\uD801"');
21+
}, 'Nette\Neon\Exception', "Invalid UTF-8 (lone surrogate) \\uD801 on line 1, column 1." );
22+
23+
1924
Assert::exception(function() {
2025
Neon::decode("- Dave,\n- Rimmer,\n- Kryten,\n");
2126
}, 'Nette\Neon\Exception', "Unexpected ',' on line 1, column 7." );

tests/Neon/Decoder.scalar.phpt

+3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ Assert::same( 'the"string', Neon::decode('the"string #literal') );
2525
Assert::same( "the'string #literal", Neon::decode('"the\'string #literal"') );
2626
Assert::same( 'the"string #literal', Neon::decode("'the\"string #literal'") );
2727
Assert::same( 'the"string #literal', Neon::decode('"the\\"string #literal"') );
28+
Assert::same( '@', Neon::decode('"\u0040"') );
29+
Assert::same( "\xC4\x9B", Neon::decode('"\u011B"') );
30+
Assert::same( "\xf0\x90\x90\x81", Neon::decode('"\uD801\uDC01"') ); // U+10401 encoded as surrogate pair
2831
Assert::same( '<literal> <literal>', Neon::decode('<literal> <literal>') );
2932
Assert::same( "", Neon::decode("''") );
3033
Assert::same( "", Neon::decode('""') );

0 commit comments

Comments
 (0)