Skip to content

Commit 372d945

Browse files
JanTvrdikdg
authored andcommitted
optimize Lexer performance (#65)
1 parent b191a35 commit 372d945

File tree

1 file changed

+7
-16
lines changed

1 file changed

+7
-16
lines changed

src/Neon/Lexer.php

+7-16
Original file line numberDiff line numberDiff line change
@@ -50,28 +50,19 @@ public function tokenize(string $input): TokenStream
5050
{
5151
$input = str_replace("\r", '', $input);
5252
$pattern = '~(' . implode(')|(', self::Patterns) . ')~Amixu';
53-
$res = preg_match_all($pattern, $input, $tokens, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL);
53+
$res = preg_match_all($pattern, $input, $matches, PREG_SET_ORDER);
5454
if ($res === false) {
5555
throw new Exception('Invalid UTF-8 sequence.');
5656
}
5757

5858
$types = array_keys(self::Patterns);
5959
$offset = 0;
60-
foreach ($tokens as &$token) {
61-
$type = null;
62-
for ($i = 1; $i <= count($types); $i++) {
63-
if (isset($token[$i])) {
64-
$type = $types[$i - 1];
65-
if ($type === Token::Char) {
66-
$type = $token[0];
67-
}
68-
69-
break;
70-
}
71-
}
72-
73-
$token = new Token($token[0], $type);
74-
$offset += strlen($token->value);
60+
61+
$tokens = [];
62+
foreach ($matches as $match) {
63+
$type = $types[count($match) - 2];
64+
$tokens[] = new Token($match[0], $type === Token::Char ? $match[0] : $type);
65+
$offset += strlen($match[0]);
7566
}
7667

7768
$stream = new TokenStream($tokens);

0 commit comments

Comments
 (0)