|
22 | 22 |
|
23 | 23 | import javax.annotation.Nonnull;
|
24 | 24 | import javax.annotation.Nullable;
|
| 25 | +import java.util.regex.Matcher; |
25 | 26 | import java.util.regex.Pattern;
|
26 | 27 |
|
27 | 28 | /**
|
@@ -92,6 +93,8 @@ public class MarkdownSanitizer
|
92 | 93 | tokens.put(STRIKE, "~~");
|
93 | 94 | }
|
94 | 95 |
|
| 96 | + private Matcher urlMatcher; |
| 97 | + private int urlReplacementIteration; |
95 | 98 | private int ignored;
|
96 | 99 | private SanitizationStrategy strategy;
|
97 | 100 |
|
@@ -553,6 +556,11 @@ private boolean isIgnored(int nextRegion)
|
553 | 556 | public String compute(@Nonnull String sequence)
|
554 | 557 | {
|
555 | 558 | Checks.notNull(sequence, "Input");
|
| 559 | + |
| 560 | + setUrlMatcher(sequence); |
| 561 | + String[][] urlSubBox = getURLSubBox(sequence); |
| 562 | + sequence = replaceURLs(sequence, urlSubBox, true); |
| 563 | + |
556 | 564 | StringBuilder builder = new StringBuilder();
|
557 | 565 | String end = handleQuote(sequence);
|
558 | 566 | if (end != null) return end;
|
@@ -590,7 +598,10 @@ public String compute(@Nonnull String sequence)
|
590 | 598 | applyStrategy(nextRegion, handleRegion(i + delta, endRegion, sequence, nextRegion), builder);
|
591 | 599 | i = endRegion + delta;
|
592 | 600 | }
|
593 |
| - return builder.toString(); |
| 601 | + |
| 602 | + sequence = replaceURLs(builder.toString(), urlSubBox, false); |
| 603 | + |
| 604 | + return sequence; |
594 | 605 | }
|
595 | 606 |
|
596 | 607 | private String handleQuote(@Nonnull String sequence)
|
@@ -630,4 +641,56 @@ public enum SanitizationStrategy
|
630 | 641 | */
|
631 | 642 | ESCAPE,
|
632 | 643 | }
|
| 644 | + |
| 645 | + //URL Handling Methods |
| 646 | + |
| 647 | + private String[][] getURLSubBox(@Nonnull String sequence){ |
| 648 | + int urlCount = countURLs(); |
| 649 | + String[][] urlSubBox = new String[2][urlCount]; |
| 650 | + urlReplacementIteration = 10000000; |
| 651 | + urlMatcher.reset(); |
| 652 | + for(int i = 0; i <= urlCount - 1; i++){ |
| 653 | + //System.out.println(i); |
| 654 | + urlSubBox[1][i] = generateURLReplacement(sequence); |
| 655 | + urlMatcher.find(); |
| 656 | + urlSubBox[0][i] = urlMatcher.group(); |
| 657 | + } |
| 658 | + return urlSubBox; |
| 659 | + } |
| 660 | + |
| 661 | + private String replaceURLs(@Nonnull String sequence, String[][] subBox, boolean start){ |
| 662 | + StringBuilder builder = new StringBuilder(sequence); |
| 663 | + String url; |
| 664 | + for(int i = 0; i <= subBox[1].length - 1; i++){ |
| 665 | + url = subBox[start ? 0 : 1][i]; |
| 666 | + builder.replace(builder.indexOf(url), builder.indexOf(url) + url.length(), subBox[start ? 1 : 0][i]); |
| 667 | + } |
| 668 | + return builder.toString(); |
| 669 | + } |
| 670 | + |
| 671 | + private int countURLs(){ //Returns true if there is a URL in the sequence |
| 672 | + try { |
| 673 | + int i = 0; |
| 674 | + while(urlMatcher.find()){ |
| 675 | + i++; |
| 676 | + } |
| 677 | + return i; |
| 678 | + } catch (RuntimeException e) { |
| 679 | + return 0; |
| 680 | + } |
| 681 | + } |
| 682 | + |
| 683 | + private void setUrlMatcher(@Nonnull String sequence){ |
| 684 | + String urlPattern = "(http|https)://[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,3}(/\\S*)?"; |
| 685 | + urlMatcher = Pattern.compile(urlPattern).matcher(sequence); |
| 686 | + } |
| 687 | + |
| 688 | + private String generateURLReplacement(@Nonnull String sequence){ |
| 689 | + String urlReplacement = "URL-" + (urlReplacementIteration); |
| 690 | + if(sequence.contains(urlReplacement)){ |
| 691 | + urlReplacementIteration++; |
| 692 | + return generateURLReplacement(sequence); |
| 693 | + } |
| 694 | + else return urlReplacement; |
| 695 | + } |
633 | 696 | }
|
0 commit comments