Skip to content

Commit 8c8369d

Browse files
committed
fix node issue 51593
1 parent 7ed703c commit 8c8369d

File tree

4 files changed

+40
-18
lines changed

4 files changed

+40
-18
lines changed

include/ada/url_aggregator.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,12 @@ struct url_aggregator : url_base {
232232
}
233233

234234
/**
235-
* Return true on success.
235+
* Return true on success. The 'in_place' parameter indicates whether the
236+
* the string_view input is pointing in the buffer. When in_place is false,
237+
* we must nearly always update the buffer.
236238
* @see https://url.spec.whatwg.org/#concept-ipv4-parser
237239
*/
238-
[[nodiscard]] bool parse_ipv4(std::string_view input);
240+
[[nodiscard]] bool parse_ipv4(std::string_view input, bool in_place);
239241

240242
/**
241243
* Return true on success.

src/url.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
namespace ada {
1010

1111
bool url::parse_opaque_host(std::string_view input) {
12-
ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]");
12+
ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]");
1313
if (std::any_of(input.begin(), input.end(),
1414
ada::unicode::is_forbidden_host_code_point)) {
1515
return is_valid = false;
@@ -23,7 +23,7 @@ bool url::parse_opaque_host(std::string_view input) {
2323
}
2424

2525
bool url::parse_ipv4(std::string_view input) {
26-
ada_log("parse_ipv4 ", input, "[", input.size(), " bytes]");
26+
ada_log("parse_ipv4 ", input, " [", input.size(), " bytes]");
2727
if (input.back() == '.') {
2828
input.remove_suffix(1);
2929
}
@@ -98,7 +98,7 @@ bool url::parse_ipv4(std::string_view input) {
9898
}
9999

100100
bool url::parse_ipv6(std::string_view input) {
101-
ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]");
101+
ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]");
102102

103103
if (input.empty()) {
104104
return is_valid = false;
@@ -422,7 +422,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) {
422422
}
423423

424424
ada_really_inline bool url::parse_host(std::string_view input) {
425-
ada_log("parse_host ", input, "[", input.size(), " bytes]");
425+
ada_log("parse_host ", input, " [", input.size(), " bytes]");
426426
if (input.empty()) {
427427
return is_valid = false;
428428
} // technically unnecessary.
@@ -474,6 +474,7 @@ ada_really_inline bool url::parse_host(std::string_view input) {
474474
ada_log("parse_host to_ascii returns false");
475475
return is_valid = false;
476476
}
477+
ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(), " bytes]");
477478

478479
if (std::any_of(host.value().begin(), host.value().end(),
479480
ada::unicode::is_forbidden_domain_code_point)) {
@@ -484,7 +485,7 @@ ada_really_inline bool url::parse_host(std::string_view input) {
484485
// If asciiDomain ends in a number, then return the result of IPv4 parsing
485486
// asciiDomain.
486487
if (checkers::is_ipv4(host.value())) {
487-
ada_log("parse_host got ipv4", *host);
488+
ada_log("parse_host got ipv4 ", *host);
488489
return parse_ipv4(host.value());
489490
}
490491

src/url_aggregator.cpp

+23-11
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ void url_aggregator::set_hash(const std::string_view input) {
411411

412412
bool url_aggregator::set_href(const std::string_view input) {
413413
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
414-
ada_log("url_aggregator::set_href ", input, "[", input.size(), " bytes]");
414+
ada_log("url_aggregator::set_href ", input, " [", input.size(), " bytes]");
415415
ada::result<url_aggregator> out = ada::parse<url_aggregator>(input);
416416
ada_log("url_aggregator::set_href, success :", out.has_value());
417417

@@ -425,7 +425,7 @@ bool url_aggregator::set_href(const std::string_view input) {
425425
}
426426

427427
ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
428-
ada_log("url_aggregator:parse_host ", input, "[", input.size(), " bytes]");
428+
ada_log("url_aggregator:parse_host \"", input, "\" [", input.size(), " bytes]");
429429
ADA_ASSERT_TRUE(validate());
430430
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
431431
if (input.empty()) {
@@ -475,7 +475,7 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
475475
update_base_hostname(input);
476476
if (checkers::is_ipv4(get_hostname())) {
477477
ada_log("parse_host fast path ipv4");
478-
return parse_ipv4(get_hostname());
478+
return parse_ipv4(get_hostname(), true);
479479
}
480480
ada_log("parse_host fast path ", get_hostname());
481481
return true;
@@ -491,6 +491,7 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
491491
ada_log("parse_host to_ascii returns false");
492492
return is_valid = false;
493493
}
494+
ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(), " bytes]");
494495

495496
if (std::any_of(host.value().begin(), host.value().end(),
496497
ada::unicode::is_forbidden_domain_code_point)) {
@@ -500,8 +501,8 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
500501
// If asciiDomain ends in a number, then return the result of IPv4 parsing
501502
// asciiDomain.
502503
if (checkers::is_ipv4(host.value())) {
503-
ada_log("parse_host got ipv4", *host);
504-
return parse_ipv4(host.value());
504+
ada_log("parse_host got ipv4 ", *host);
505+
return parse_ipv4(host.value(), false);
505506
}
506507

507508
update_base_hostname(host.value());
@@ -754,7 +755,7 @@ bool url_aggregator::set_hostname(const std::string_view input) {
754755
}
755756

756757
[[nodiscard]] std::string ada::url_aggregator::to_string() const {
757-
ada_log("url_aggregator::to_string buffer:", buffer, "[", buffer.size(),
758+
ada_log("url_aggregator::to_string buffer:", buffer, " [", buffer.size(),
758759
" bytes]");
759760
if (!is_valid) {
760761
return "null";
@@ -853,8 +854,8 @@ bool url_aggregator::set_hostname(const std::string_view input) {
853854
return checkers::verify_dns_length(get_hostname());
854855
}
855856

856-
bool url_aggregator::parse_ipv4(std::string_view input) {
857-
ada_log("parse_ipv4 ", input, "[", input.size(),
857+
bool url_aggregator::parse_ipv4(std::string_view input, bool in_place) {
858+
ada_log("parse_ipv4 ", input, " [", input.size(),
858859
" bytes], overlaps with buffer: ",
859860
helpers::overlaps(input, buffer) ? "yes" : "no");
860861
ADA_ASSERT_TRUE(validate());
@@ -878,20 +879,25 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
878879
} else {
879880
std::from_chars_result r;
880881
if (is_hex) {
882+
ada_log("parse_ipv4 trying to parse hex number");
881883
r = std::from_chars(input.data() + 2, input.data() + input.size(),
882884
segment_result, 16);
883885
} else if ((input.length() >= 2) && input[0] == '0' &&
884886
checkers::is_digit(input[1])) {
887+
ada_log("parse_ipv4 trying to parse octal number");
885888
r = std::from_chars(input.data() + 1, input.data() + input.size(),
886889
segment_result, 8);
887890
} else {
891+
ada_log("parse_ipv4 trying to parse decimal number");
888892
pure_decimal_count++;
889893
r = std::from_chars(input.data(), input.data() + input.size(),
890894
segment_result, 10);
891895
}
892896
if (r.ec != std::errc()) {
897+
ada_log("parse_ipv4 parsing failed");
893898
return is_valid = false;
894899
}
900+
ada_log("parse_ipv4 parsed ", segment_result);
895901
input.remove_prefix(r.ptr - input.data());
896902
}
897903
if (input.empty()) {
@@ -916,17 +922,20 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
916922
}
917923
}
918924
if ((digit_count != 4) || (!input.empty())) {
925+
ada_log("parse_ipv4 found invalid (more than 4 numbers or empty) ");
919926
return is_valid = false;
920927
}
921928
final:
922929
ada_log("url_aggregator::parse_ipv4 completed ", get_href(),
923930
" host: ", get_host());
924931

925932
// We could also check r.ptr to see where the parsing ended.
926-
if (pure_decimal_count == 4 && !trailing_dot) {
933+
if (in_place && pure_decimal_count == 4 && !trailing_dot) {
934+
ada_log("url_aggregator::parse_ipv4 completed and was already correct in the buffer");
927935
// The original input was already all decimal and we validated it. So we
928936
// don't need to do anything.
929937
} else {
938+
ada_log("url_aggregator::parse_ipv4 completed and we need to update it");
930939
// Optimization opportunity: Get rid of unnecessary string return in ipv4
931940
// serializer.
932941
// TODO: This is likely a bug because it goes back update_base_hostname, not
@@ -940,8 +949,11 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
940949
}
941950

942951
bool url_aggregator::parse_ipv6(std::string_view input) {
952+
// TODO: Implement in_place optimization: we know that input points
953+
// in the buffer, so we can just check whether the buffer is already
954+
// well formatted.
943955
// TODO: Find a way to merge parse_ipv6 with url.cpp implementation.
944-
ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]");
956+
ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]");
945957
ADA_ASSERT_TRUE(validate());
946958
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
947959
if (input.empty()) {
@@ -1175,7 +1187,7 @@ bool url_aggregator::parse_ipv6(std::string_view input) {
11751187
}
11761188

11771189
bool url_aggregator::parse_opaque_host(std::string_view input) {
1178-
ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]");
1190+
ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]");
11791191
ADA_ASSERT_TRUE(validate());
11801192
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
11811193
if (std::any_of(input.begin(), input.end(),

tests/basic_tests.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,10 @@ TYPED_TEST(basic_tests, nodejs_51514) {
402402
auto out = ada::parse<TypeParam>("http://1.1.1.256");
403403
ASSERT_FALSE(out);
404404
}
405+
// https://github.com/nodejs/node/issues/51593
406+
TYPED_TEST(basic_tests, nodejs_51593) {
407+
auto out = ada::parse<TypeParam>("http://\u200b123.123.123.123");
408+
ASSERT_TRUE(out);
409+
ASSERT_EQ(out->get_href(), "http://123.123.123.123/");
410+
SUCCEED();
411+
}

0 commit comments

Comments
 (0)