Skip to content

Improve performance of urldecode() and rawurldecode() #18378

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions UPGRADING.INTERNALS
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ PHP 8.5 INTERNALS UPGRADE NOTES
is still valid. This is useful when a GC cycle is collected and the
database object can be destroyed prior to destroying the statement.

- ext/standard
. Added php_url_decode_ex() and php_raw_url_decode_ex() that unlike their
non-ex counterparts do not work in-place.

========================
4. OpCode changes
========================
Expand Down
69 changes: 41 additions & 28 deletions ext/standard/url.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,21 +411,24 @@ PHP_FUNCTION(parse_url)
}
/* }}} */

/* https://stackoverflow.com/questions/34365746/whats-the-fastest-way-to-convert-hex-to-integer-in-c */
static unsigned int php_htoi_single(unsigned char x)
{
ZEND_ASSERT((x >= 'a' && x <= 'f') || (x >= 'A' && x <= 'F') || (x >= '0' && x <= '9'));
return 9 * (x >> 6) + (x & 0xf);
}

/* {{{ php_htoi */
static int php_htoi(char *s)
static int php_htoi(const char *s)
{
int value;
int c;
unsigned char c;

c = ((unsigned char *)s)[0];
if (isupper(c))
c = tolower(c);
value = (c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10) * 16;
value = php_htoi_single(c) * 16;

c = ((unsigned char *)s)[1];
if (isupper(c))
c = tolower(c);
value += c >= '0' && c <= '9' ? c - '0' : c - 'a' + 10;
value += php_htoi_single(c);

return (value);
}
Expand Down Expand Up @@ -571,36 +574,41 @@ PHP_FUNCTION(urldecode)
Z_PARAM_STR(in_str)
ZEND_PARSE_PARAMETERS_END();

out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
ZSTR_LEN(out_str) = php_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
out_str = zend_string_alloc(ZSTR_LEN(in_str), false);
ZSTR_LEN(out_str) = php_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str));

RETURN_NEW_STR(out_str);
}
/* }}} */

/* {{{ php_url_decode */
PHPAPI size_t php_url_decode(char *str, size_t len)
PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len)
{
char *dest = str;
char *data = str;
char *dest_start = dest;
const char *data = src;

while (len--) {
while (src_len--) {
if (*data == '+') {
*dest = ' ';
}
else if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
else if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1))
&& isxdigit((int) *(data + 2))) {
*dest = (char) php_htoi(data + 1);
data += 2;
len -= 2;
src_len -= 2;
} else {
*dest = *data;
}
data++;
dest++;
}
*dest = '\0';
return dest - str;
return dest - dest_start;
}

/* {{{ php_url_decode */
PHPAPI size_t php_url_decode(char *str, size_t len)
{
return php_url_decode_ex(str, str, len);
}
/* }}} */

Expand Down Expand Up @@ -633,33 +641,38 @@ PHP_FUNCTION(rawurldecode)
Z_PARAM_STR(in_str)
ZEND_PARSE_PARAMETERS_END();

out_str = zend_string_init(ZSTR_VAL(in_str), ZSTR_LEN(in_str), 0);
ZSTR_LEN(out_str) = php_raw_url_decode(ZSTR_VAL(out_str), ZSTR_LEN(out_str));
out_str = zend_string_alloc(ZSTR_LEN(in_str), false);
ZSTR_LEN(out_str) = php_raw_url_decode_ex(ZSTR_VAL(out_str), ZSTR_VAL(in_str), ZSTR_LEN(in_str));

RETURN_NEW_STR(out_str);
}
/* }}} */

/* {{{ php_raw_url_decode */
PHPAPI size_t php_raw_url_decode(char *str, size_t len)
PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len)
{
char *dest = str;
char *data = str;
char *dest_start = dest;
const char *data = src;

while (len--) {
if (*data == '%' && len >= 2 && isxdigit((int) *(data + 1))
while (src_len--) {
if (*data == '%' && src_len >= 2 && isxdigit((int) *(data + 1))
&& isxdigit((int) *(data + 2))) {
*dest = (char) php_htoi(data + 1);
data += 2;
len -= 2;
src_len -= 2;
} else {
*dest = *data;
}
data++;
dest++;
}
*dest = '\0';
return dest - str;
return dest - dest_start;
}

/* {{{ php_raw_url_decode */
PHPAPI size_t php_raw_url_decode(char *str, size_t len)
{
return php_raw_url_decode_ex(str, str, len);
}
/* }}} */

Expand Down
2 changes: 2 additions & 0 deletions ext/standard/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ PHPAPI php_url *php_url_parse(char const *str);
PHPAPI php_url *php_url_parse_ex(char const *str, size_t length);
PHPAPI php_url *php_url_parse_ex2(char const *str, size_t length, bool *has_port);
PHPAPI size_t php_url_decode(char *str, size_t len); /* return value: length of decoded string */
PHPAPI size_t php_url_decode_ex(char *dest, const char *src, size_t src_len);
PHPAPI size_t php_raw_url_decode(char *str, size_t len); /* return value: length of decoded string */
PHPAPI size_t php_raw_url_decode_ex(char *dest, const char *src, size_t src_len);
PHPAPI zend_string *php_url_encode(char const *s, size_t len);
PHPAPI zend_string *php_raw_url_encode(char const *s, size_t len);

Expand Down
Loading