@@ -729,12 +729,16 @@ private function tagNameState() {
729
729
$ this ->state = 'data ' ;
730
730
731
731
} elseif ('A ' <= $ char && $ char <= 'Z ' ) {
732
- // possible optimization: glob further
733
732
/* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z
734
733
Append the lowercase version of the current input
735
734
character (add 0x0020 to the character's code point) to
736
735
the current tag token's tag name. Stay in the tag name state. */
737
- $ this ->token ['name ' ] .= strtolower ($ char );
736
+ $ len = strspn ($ this ->data , self ::UPPER_ALPHA , $ this ->char + 1 );
737
+ $ char = substr ($ this ->data , $ this ->char + 1 , $ len );
738
+
739
+ $ this ->char += $ len ;
740
+
741
+ $ this ->token ['name ' ] .= strtolower ($ this ->c . $ char );
738
742
$ this ->state = 'tagName ' ;
739
743
740
744
} elseif ($ char === false ) {
@@ -746,11 +750,15 @@ private function tagNameState() {
746
750
$ this ->EOF ();
747
751
748
752
} else {
749
- // possible optimization: glob further
750
753
/* Anything else
751
754
Append the current input character to the current tag token's tag name.
752
755
Stay in the tag name state. */
753
- $ this ->token ['name ' ] .= $ char ;
756
+ $ len = strspn ($ this ->data , self ::LOWER_ALPHA , $ this ->char + 1 );
757
+ $ char = substr ($ this ->data , $ this ->char + 1 , $ len );
758
+
759
+ $ this ->char += $ len ;
760
+
761
+ $ this ->token ['name ' ] .= $ this ->c . $ char ;
754
762
$ this ->state = 'tagName ' ;
755
763
}
756
764
}
@@ -1051,8 +1059,13 @@ private function attributeValueDoubleQuotedState() {
1051
1059
/* Anything else
1052
1060
Append the current input character to the current attribute's value.
1053
1061
Stay in the attribute value (double-quoted) state. */
1062
+ $ len = strcspn ($ this ->data , '"& ' , $ this ->char + 1 );
1063
+ $ char = substr ($ this ->data , $ this ->char + 1 , $ len );
1064
+
1065
+ $ this ->char += $ len ;
1066
+
1054
1067
$ last = count ($ this ->token ['attr ' ]) - 1 ;
1055
- $ this ->token ['attr ' ][$ last ]['value ' ] .= $ char ;
1068
+ $ this ->token ['attr ' ][$ last ]['value ' ] .= $ this -> c . $ char ;
1056
1069
1057
1070
$ this ->state = 'attributeValueDoubleQuoted ' ;
1058
1071
}
@@ -1084,8 +1097,13 @@ private function attributeValueSingleQuotedState() {
1084
1097
/* Anything else
1085
1098
Append the current input character to the current attribute's value.
1086
1099
Stay in the attribute value (single-quoted) state. */
1100
+ $ len = strcspn ($ this ->data , "'& " , $ this ->char + 1 );
1101
+ $ char = substr ($ this ->data , $ this ->char + 1 , $ len );
1102
+
1103
+ $ this ->char += $ len ;
1104
+
1087
1105
$ last = count ($ this ->token ['attr ' ]) - 1 ;
1088
- $ this ->token ['attr ' ][$ last ]['value ' ] .= $ char ;
1106
+ $ this ->token ['attr ' ][$ last ]['value ' ] .= $ this -> c . $ char ;
1089
1107
1090
1108
$ this ->state = 'attributeValueSingleQuoted ' ;
1091
1109
}
@@ -1131,8 +1149,13 @@ private function attributeValueUnquotedState() {
1131
1149
/* Anything else
1132
1150
Append the current input character to the current attribute's value.
1133
1151
Stay in the attribute value (unquoted) state. */
1152
+ $ len = strcspn ($ this ->data , "\t\n\x0c &> \"'= " , $ this ->char + 1 );
1153
+ $ char = substr ($ this ->data , $ this ->char + 1 , $ len );
1154
+
1155
+ $ this ->char += $ len ;
1156
+
1134
1157
$ last = count ($ this ->token ['attr ' ]) - 1 ;
1135
- $ this ->token ['attr ' ][$ last ]['value ' ] .= $ char ;
1158
+ $ this ->token ['attr ' ][$ last ]['value ' ] .= $ this -> c . $ char ;
1136
1159
1137
1160
$ this ->state = 'attributeValueUnquoted ' ;
1138
1161
}
@@ -1368,7 +1391,12 @@ private function commentState() {
1368
1391
/* Anything else
1369
1392
Append the input character to the comment token's data. Stay in
1370
1393
the comment state. */
1371
- $ this ->token ['data ' ] .= $ char ;
1394
+ $ len = strcspn ($ this ->data , '- ' , $ this ->char + 1 );
1395
+ $ char = substr ($ this ->data , $ this ->char + 1 , $ len );
1396
+
1397
+ $ this ->char += $ len ;
1398
+
1399
+ $ this ->token ['data ' ] .= $ this ->c . $ char ;
1372
1400
}
1373
1401
}
1374
1402
0 commit comments