Skip to content

Commit b891dc5

Browse files
committed
PHP: grab multiple characters at once more often. This gives a fair performance boost on the spec (~12s), primarily by cutting out repeated method calls of each of the states.
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401287
1 parent 1fb0f97 commit b891dc5

File tree

1 file changed

+36
-8
lines changed

1 file changed

+36
-8
lines changed

library/HTML5/Tokenizer.php

+36-8
Original file line numberDiff line numberDiff line change
@@ -729,12 +729,16 @@ private function tagNameState() {
729729
$this->state = 'data';
730730

731731
} elseif('A' <= $char && $char <= 'Z') {
732-
// possible optimization: glob further
733732
/* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
734733
Append the lowercase version of the current input
735734
character (add 0x0020 to the character's code point) to
736735
the current tag token's tag name. Stay in the tag name state. */
737-
$this->token['name'] .= strtolower($char);
736+
$len = strspn($this->data, self::UPPER_ALPHA, $this->char + 1);
737+
$char = substr($this->data, $this->char + 1, $len);
738+
739+
$this->char += $len;
740+
741+
$this->token['name'] .= strtolower($this->c . $char);
738742
$this->state = 'tagName';
739743

740744
} elseif($char === false) {
@@ -746,11 +750,15 @@ private function tagNameState() {
746750
$this->EOF();
747751

748752
} else {
749-
// possible optimization: glob further
750753
/* Anything else
751754
Append the current input character to the current tag token's tag name.
752755
Stay in the tag name state. */
753-
$this->token['name'] .= $char;
756+
$len = strspn($this->data, self::LOWER_ALPHA, $this->char + 1);
757+
$char = substr($this->data, $this->char + 1, $len);
758+
759+
$this->char += $len;
760+
761+
$this->token['name'] .= $this->c . $char;
754762
$this->state = 'tagName';
755763
}
756764
}
@@ -1051,8 +1059,13 @@ private function attributeValueDoubleQuotedState() {
10511059
/* Anything else
10521060
Append the current input character to the current attribute's value.
10531061
Stay in the attribute value (double-quoted) state. */
1062+
$len = strcspn($this->data, '"&', $this->char + 1);
1063+
$char = substr($this->data, $this->char + 1, $len);
1064+
1065+
$this->char += $len;
1066+
10541067
$last = count($this->token['attr']) - 1;
1055-
$this->token['attr'][$last]['value'] .= $char;
1068+
$this->token['attr'][$last]['value'] .= $this->c . $char;
10561069

10571070
$this->state = 'attributeValueDoubleQuoted';
10581071
}
@@ -1084,8 +1097,13 @@ private function attributeValueSingleQuotedState() {
10841097
/* Anything else
10851098
Append the current input character to the current attribute's value.
10861099
Stay in the attribute value (single-quoted) state. */
1100+
$len = strcspn($this->data, "'&", $this->char + 1);
1101+
$char = substr($this->data, $this->char + 1, $len);
1102+
1103+
$this->char += $len;
1104+
10871105
$last = count($this->token['attr']) - 1;
1088-
$this->token['attr'][$last]['value'] .= $char;
1106+
$this->token['attr'][$last]['value'] .= $this->c . $char;
10891107

10901108
$this->state = 'attributeValueSingleQuoted';
10911109
}
@@ -1131,8 +1149,13 @@ private function attributeValueUnquotedState() {
11311149
/* Anything else
11321150
Append the current input character to the current attribute's value.
11331151
Stay in the attribute value (unquoted) state. */
1152+
$len = strcspn($this->data, "\t\n\x0c &>\"'=", $this->char + 1);
1153+
$char = substr($this->data, $this->char + 1, $len);
1154+
1155+
$this->char += $len;
1156+
11341157
$last = count($this->token['attr']) - 1;
1135-
$this->token['attr'][$last]['value'] .= $char;
1158+
$this->token['attr'][$last]['value'] .= $this->c . $char;
11361159

11371160
$this->state = 'attributeValueUnquoted';
11381161
}
@@ -1368,7 +1391,12 @@ private function commentState() {
13681391
/* Anything else
13691392
Append the input character to the comment token's data. Stay in
13701393
the comment state. */
1371-
$this->token['data'] .= $char;
1394+
$len = strcspn($this->data, '-', $this->char + 1);
1395+
$char = substr($this->data, $this->char + 1, $len);
1396+
1397+
$this->char += $len;
1398+
1399+
$this->token['data'] .= $this->c . $char;
13721400
}
13731401
}
13741402

0 commit comments

Comments
 (0)