@@ -411,7 +411,7 @@ void url_aggregator::set_hash(const std::string_view input) {
411
411
412
412
bool url_aggregator::set_href (const std::string_view input) {
413
413
ADA_ASSERT_TRUE (!helpers::overlaps (input, buffer));
414
- ada_log (" url_aggregator::set_href " , input, " [" , input.size (), " bytes]" );
414
+ ada_log (" url_aggregator::set_href " , input, " [" , input.size (), " bytes]" );
415
415
ada::result<url_aggregator> out = ada::parse<url_aggregator>(input);
416
416
ada_log (" url_aggregator::set_href, success :" , out.has_value ());
417
417
@@ -425,7 +425,7 @@ bool url_aggregator::set_href(const std::string_view input) {
425
425
}
426
426
427
427
ada_really_inline bool url_aggregator::parse_host (std::string_view input) {
428
- ada_log (" url_aggregator:parse_host " , input, " [" , input.size (), " bytes]" );
428
+ ada_log (" url_aggregator:parse_host \" " , input, " \" [" , input.size (), " bytes]" );
429
429
ADA_ASSERT_TRUE (validate ());
430
430
ADA_ASSERT_TRUE (!helpers::overlaps (input, buffer));
431
431
if (input.empty ()) {
@@ -475,7 +475,7 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
475
475
update_base_hostname (input);
476
476
if (checkers::is_ipv4 (get_hostname ())) {
477
477
ada_log (" parse_host fast path ipv4" );
478
- return parse_ipv4 (get_hostname ());
478
+ return parse_ipv4 (get_hostname (), true );
479
479
}
480
480
ada_log (" parse_host fast path " , get_hostname ());
481
481
return true ;
@@ -491,6 +491,7 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
491
491
ada_log (" parse_host to_ascii returns false" );
492
492
return is_valid = false ;
493
493
}
494
+ ada_log (" parse_host to_ascii succeeded " , *host, " [" , host->size (), " bytes]" );
494
495
495
496
if (std::any_of (host.value ().begin (), host.value ().end (),
496
497
ada::unicode::is_forbidden_domain_code_point)) {
@@ -500,8 +501,8 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
500
501
// If asciiDomain ends in a number, then return the result of IPv4 parsing
501
502
// asciiDomain.
502
503
if (checkers::is_ipv4 (host.value ())) {
503
- ada_log (" parse_host got ipv4" , *host);
504
- return parse_ipv4 (host.value ());
504
+ ada_log (" parse_host got ipv4 " , *host);
505
+ return parse_ipv4 (host.value (), false );
505
506
}
506
507
507
508
update_base_hostname (host.value ());
@@ -754,7 +755,7 @@ bool url_aggregator::set_hostname(const std::string_view input) {
754
755
}
755
756
756
757
[[nodiscard]] std::string ada::url_aggregator::to_string () const {
757
- ada_log (" url_aggregator::to_string buffer:" , buffer, " [" , buffer.size (),
758
+ ada_log (" url_aggregator::to_string buffer:" , buffer, " [" , buffer.size (),
758
759
" bytes]" );
759
760
if (!is_valid) {
760
761
return " null" ;
@@ -853,8 +854,8 @@ bool url_aggregator::set_hostname(const std::string_view input) {
853
854
return checkers::verify_dns_length (get_hostname ());
854
855
}
855
856
856
- bool url_aggregator::parse_ipv4 (std::string_view input) {
857
- ada_log (" parse_ipv4 " , input, " [" , input.size (),
857
+ bool url_aggregator::parse_ipv4 (std::string_view input, bool in_place ) {
858
+ ada_log (" parse_ipv4 " , input, " [" , input.size (),
858
859
" bytes], overlaps with buffer: " ,
859
860
helpers::overlaps (input, buffer) ? " yes" : " no" );
860
861
ADA_ASSERT_TRUE (validate ());
@@ -878,20 +879,25 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
878
879
} else {
879
880
std::from_chars_result r;
880
881
if (is_hex) {
882
+ ada_log (" parse_ipv4 trying to parse hex number" );
881
883
r = std::from_chars (input.data () + 2 , input.data () + input.size (),
882
884
segment_result, 16 );
883
885
} else if ((input.length () >= 2 ) && input[0 ] == ' 0' &&
884
886
checkers::is_digit (input[1 ])) {
887
+ ada_log (" parse_ipv4 trying to parse octal number" );
885
888
r = std::from_chars (input.data () + 1 , input.data () + input.size (),
886
889
segment_result, 8 );
887
890
} else {
891
+ ada_log (" parse_ipv4 trying to parse decimal number" );
888
892
pure_decimal_count++;
889
893
r = std::from_chars (input.data (), input.data () + input.size (),
890
894
segment_result, 10 );
891
895
}
892
896
if (r.ec != std::errc ()) {
897
+ ada_log (" parse_ipv4 parsing failed" );
893
898
return is_valid = false ;
894
899
}
900
+ ada_log (" parse_ipv4 parsed " , segment_result);
895
901
input.remove_prefix (r.ptr - input.data ());
896
902
}
897
903
if (input.empty ()) {
@@ -916,17 +922,20 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
916
922
}
917
923
}
918
924
if ((digit_count != 4 ) || (!input.empty ())) {
925
+ ada_log (" parse_ipv4 found invalid (more than 4 numbers or empty) " );
919
926
return is_valid = false ;
920
927
}
921
928
final :
922
929
ada_log (" url_aggregator::parse_ipv4 completed " , get_href (),
923
930
" host: " , get_host ());
924
931
925
932
// We could also check r.ptr to see where the parsing ended.
926
- if (pure_decimal_count == 4 && !trailing_dot) {
933
+ if (in_place && pure_decimal_count == 4 && !trailing_dot) {
934
+ ada_log (" url_aggregator::parse_ipv4 completed and was already correct in the buffer" );
927
935
// The original input was already all decimal and we validated it. So we
928
936
// don't need to do anything.
929
937
} else {
938
+ ada_log (" url_aggregator::parse_ipv4 completed and we need to update it" );
930
939
// Optimization opportunity: Get rid of unnecessary string return in ipv4
931
940
// serializer.
932
941
// TODO: This is likely a bug because it goes back update_base_hostname, not
@@ -940,8 +949,11 @@ bool url_aggregator::parse_ipv4(std::string_view input) {
940
949
}
941
950
942
951
bool url_aggregator::parse_ipv6 (std::string_view input) {
952
+ // TODO: Implement in_place optimization: we know that input points
953
+ // in the buffer, so we can just check whether the buffer is already
954
+ // well formatted.
943
955
// TODO: Find a way to merge parse_ipv6 with url.cpp implementation.
944
- ada_log (" parse_ipv6 " , input, " [" , input.size (), " bytes]" );
956
+ ada_log (" parse_ipv6 " , input, " [" , input.size (), " bytes]" );
945
957
ADA_ASSERT_TRUE (validate ());
946
958
ADA_ASSERT_TRUE (!helpers::overlaps (input, buffer));
947
959
if (input.empty ()) {
@@ -1175,7 +1187,7 @@ bool url_aggregator::parse_ipv6(std::string_view input) {
1175
1187
}
1176
1188
1177
1189
bool url_aggregator::parse_opaque_host (std::string_view input) {
1178
- ada_log (" parse_opaque_host " , input, " [" , input.size (), " bytes]" );
1190
+ ada_log (" parse_opaque_host " , input, " [" , input.size (), " bytes]" );
1179
1191
ADA_ASSERT_TRUE (validate ());
1180
1192
ADA_ASSERT_TRUE (!helpers::overlaps (input, buffer));
1181
1193
if (std::any_of (input.begin (), input.end (),
0 commit comments