|
23 | 23 | import java.util.ArrayList; |
24 | 24 | import java.util.Collections; |
25 | 25 | import java.util.HashMap; |
| 26 | +import java.util.HashSet; |
26 | 27 | import java.util.List; |
27 | 28 | import java.util.Map; |
| 29 | +import java.util.Set; |
28 | 30 | import java.util.UUID; |
29 | 31 | import java.util.regex.Matcher; |
30 | 32 | import java.util.regex.Pattern; |
@@ -137,6 +139,8 @@ public String getReason() { |
137 | 139 | Pattern.compile( |
138 | 140 | "(?i)window\\.(?:open|navigate)\\s*\\(\\s*['\"](" + SITE_PATT + ")['\"]"); |
139 | 141 |
|
| 142 | + private static final List<String> JS_PRE_CHECKS = List.of("window.", "location.", "location="); |
| 143 | + |
140 | 144 | /** The various (prioritized) payloads to try */ |
141 | 145 | private enum RedirectPayloads { |
142 | 146 | PLAIN_SITE(REDIRECT_SITE, false), |
@@ -476,11 +480,177 @@ private static RedirectType isRedirected(String payload, HttpMessage msg) { |
476 | 480 | } |
477 | 481 |
|
478 | 482 | private static boolean isRedirectPresent(Pattern pattern, String value) { |
479 | | - Matcher matcher = pattern.matcher(value); |
| 483 | + // Ensure the value has something we're interested in before dealing with comments |
| 484 | + if (!StringUtils.containsIgnoreCase(value, SITE_HOST) |
| 485 | + && JS_PRE_CHECKS.stream() |
| 486 | + .noneMatch(chk -> StringUtils.containsIgnoreCase(value, chk))) { |
| 487 | + return false; |
| 488 | + } |
| 489 | + Set<String> extractedComments = extractJsComments(value); |
| 490 | + String valueWithoutComments = value; |
| 491 | + for (String comment : extractedComments) { |
| 492 | + valueWithoutComments = valueWithoutComments.replace(comment, ""); |
| 493 | + } |
| 494 | + |
| 495 | + Matcher matcher = pattern.matcher(valueWithoutComments); |
| 496 | + |
480 | 497 | return matcher.find() |
481 | 498 | && StringUtils.startsWithIgnoreCase(matcher.group(1), HttpHeader.HTTP); |
482 | 499 | } |
483 | 500 |
|
| 501 | + private static Set<String> extractJsComments(String js) { |
| 502 | + // Some of the escapes in the comments below are double because of Java requirements |
| 503 | + Set<String> comments = new HashSet<>(); |
| 504 | + |
| 505 | + final int n = js.length(); |
| 506 | + boolean inSingle = false; // '...' |
| 507 | + boolean inDouble = false; // "..." |
| 508 | + int i = 0; |
| 509 | + |
| 510 | + while (i < n) { |
| 511 | + char c = js.charAt(i); |
| 512 | + |
| 513 | + // Inside a quoted string? Only look for the matching quote, consuming full escapes. |
| 514 | + if (inSingle || inDouble) { |
| 515 | + if (c == '\\') { |
| 516 | + i = consumeJsEscape(js, i); // Returns index of the last char of the escape |
| 517 | + } else if (inSingle && c == '\'') { |
| 518 | + inSingle = false; |
| 519 | + } else if (inDouble && c == '"') { |
| 520 | + inDouble = false; |
| 521 | + } |
| 522 | + i++; |
| 523 | + continue; |
| 524 | + } |
| 525 | + |
| 526 | + // Not inside a string: maybe we’re entering one? |
| 527 | + if (c == '\'') { |
| 528 | + inSingle = true; |
| 529 | + i++; |
| 530 | + continue; |
| 531 | + } |
| 532 | + if (c == '"') { |
| 533 | + inDouble = true; |
| 534 | + i++; |
| 535 | + continue; |
| 536 | + } |
| 537 | + |
| 538 | + // Not in a string: check for comments |
| 539 | + if (c == '/' && i + 1 < n) { |
| 540 | + char d = js.charAt(i + 1); |
| 541 | + |
| 542 | + // Single-line //... |
| 543 | + if (d == '/') { |
| 544 | + int end = i + 2; |
| 545 | + while (end < n && !isJsLineTerminator(js.charAt(end))) end++; |
| 546 | + comments.add(js.substring(i, end)); |
| 547 | + i = end; // position at line break (or end) |
| 548 | + continue; |
| 549 | + } |
| 550 | + |
| 551 | + // Multi-line /* ... */ |
| 552 | + if (d == '*') { |
| 553 | + int end = js.indexOf("*/", i + 2); |
| 554 | + if (end == -1) { |
| 555 | + // Unterminated: consume to end |
| 556 | + comments.add(js.substring(i)); |
| 557 | + i = n; |
| 558 | + } else { |
| 559 | + comments.add(js.substring(i, end + 2)); |
| 560 | + i = end + 2; |
| 561 | + } |
| 562 | + continue; |
| 563 | + } |
| 564 | + } |
| 565 | + |
| 566 | + // Otherwise, just move on. |
| 567 | + i++; |
| 568 | + } |
| 569 | + |
| 570 | + return comments; |
| 571 | + } |
| 572 | + |
| 573 | + /** |
| 574 | + * Consumes a full JS escape sequence starting at the backslash. Returns the index of the last |
| 575 | + * character that belongs to the escape. Handles: \n, \r, \t, \b, \f, \v, \0, \', \", \\, |
| 576 | + * line-continuations, \xHH, \uFFFF, \\u{...} |
| 577 | + */ |
| 578 | + private static int consumeJsEscape(String s, int backslash) { |
| 579 | + int n = s.length(); |
| 580 | + int i = backslash; |
| 581 | + if (i + 1 >= n) { |
| 582 | + return i; // Nothing to consume after '\' |
| 583 | + } |
| 584 | + |
| 585 | + char e = s.charAt(i + 1); |
| 586 | + |
| 587 | + // Line continuation: backslash followed by a line terminator |
| 588 | + if (isJsLineTerminator(e)) { |
| 589 | + // Consume \r\n as a unit if present |
| 590 | + if (e == '\r' && i + 2 < n && s.charAt(i + 2) == '\n') { |
| 591 | + return i + 2; |
| 592 | + } |
| 593 | + return i + 1; |
| 594 | + } |
| 595 | + |
| 596 | + // \xHH (2 hex digits) |
| 597 | + if (e == 'x' || e == 'X') { |
| 598 | + int j = i + 2; |
| 599 | + int consumed = 0; |
| 600 | + while (j < n && consumed < 2 && isHexDigit(s.charAt(j))) { |
| 601 | + j++; |
| 602 | + consumed++; |
| 603 | + } |
| 604 | + // Even if malformed, we stop at the last hex digit we found |
| 605 | + return j - 1; |
| 606 | + } |
| 607 | + |
| 608 | + // \uFFFF or \\u{...} |
| 609 | + if (e == 'u' || e == 'U') { |
| 610 | + int j = i + 2; |
| 611 | + if (j < n && s.charAt(j) == '{') { |
| 612 | + // \\u{hex+} |
| 613 | + j++; |
| 614 | + while (j < n && isHexDigit(s.charAt(j))) { |
| 615 | + j++; |
| 616 | + } |
| 617 | + if (j < n && s.charAt(j) == '}') j++; // Close if present |
| 618 | + return j - 1; // End of } or last hex if malformed |
| 619 | + } else { |
| 620 | + // \\uHHHH (exactly 4 hex if well-formed) |
| 621 | + int consumed = 0; |
| 622 | + while (j < n && consumed < 4 && isHexDigit(s.charAt(j))) { |
| 623 | + j++; |
| 624 | + consumed++; |
| 625 | + } |
| 626 | + return j - 1; |
| 627 | + } |
| 628 | + } |
| 629 | + |
| 630 | + // Octal escapes (legacy). Consume up to 3 octal digits if present. |
| 631 | + if (e >= '0' && e <= '7') { |
| 632 | + int j = i + 1; |
| 633 | + int consumed = 0; |
| 634 | + while (j < n && consumed < 3 && s.charAt(j) >= '0' && s.charAt(j) <= '7') { |
| 635 | + j++; |
| 636 | + consumed++; |
| 637 | + } |
| 638 | + return j - 1; |
| 639 | + } |
| 640 | + |
| 641 | + // Simple one-char escapes: \n \r \t \b \f \v \0 \' \" \\ |
| 642 | + return i + 1; |
| 643 | + } |
| 644 | + |
| 645 | + private static boolean isHexDigit(char c) { |
| 646 | + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); |
| 647 | + } |
| 648 | + |
| 649 | + private static boolean isJsLineTerminator(char c) { |
| 650 | + // JS line terminators: LF, CR, LS, PS |
| 651 | + return c == '\n' || c == '\r' || c == '\u2028' || c == '\u2029'; |
| 652 | + } |
| 653 | + |
484 | 654 | @Override |
485 | 655 | public int getRisk() { |
486 | 656 | return Alert.RISK_HIGH; |
|
0 commit comments