Skip to content

Commit

Permalink
Merge pull request #2053 from RipMeApp/fix-deprecated-url-ctors
Browse files Browse the repository at this point in the history
Fix usage of deprecated URL constructors, which found a couple of issues
  • Loading branch information
soloturn authored Jan 2, 2025
2 parents 3b9352f + 692430c commit 24c441e
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 32 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ the following combinations of tags:
- testSlow runs tests with tag "slow".
- tests can be run by test class, or single test. Use "testAll" so it does
not matter if a test is tagged or not.
- tests can give the full stack of an assertion, exception, or error if you pass `--info` to the command

```bash
./gradlew test
Expand All @@ -129,6 +130,7 @@ the following combinations of tags:
./gradlew testSlow
./gradlew testAll --tests XhamsterRipperTest
./gradlew testAll --tests XhamsterRipperTest.testXhamster2Album
./gradlew testAll --tests ChanRipperTest --info
```

Please note that some tests may fail as sites change and our rippers
Expand Down
55 changes: 33 additions & 22 deletions src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
* Simplified ripper, designed for ripping from sites by parsing HTML.
*/
public abstract class AbstractHTMLRipper extends AbstractRipper {

private final Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<>());
private final Map<URL, Path> itemsCompleted = Collections.synchronizedMap(new HashMap<>());
private final Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<>());
Expand Down Expand Up @@ -60,11 +60,15 @@ protected Document getCachedFirstPage() throws IOException, URISyntaxException {
public Document getNextPage(Document doc) throws IOException, URISyntaxException {
return null;
}
protected abstract List<String> getURLsFromPage(Document page) throws UnsupportedEncodingException;

protected abstract List<String> getURLsFromPage(Document page) throws UnsupportedEncodingException, URISyntaxException;

protected List<String> getDescriptionsFromPage(Document doc) throws IOException {
throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function?
}

protected abstract void downloadURL(URL url, int index);

protected DownloadThreadPool getThreadPool() {
return null;
}
Expand Down Expand Up @@ -130,7 +134,7 @@ public void rip() throws IOException, URISyntaxException {
List<String> doclocation = new ArrayList<>();

LOGGER.info("Got doc location " + doc.location());

while (doc != null) {

LOGGER.info("Processing a doc...");
Expand Down Expand Up @@ -167,7 +171,7 @@ public void rip() throws IOException, URISyntaxException {
for (String imageURL : imageURLs) {
index += 1;
LOGGER.debug("Found image url #" + index + ": '" + imageURL + "'");
downloadURL(new URL(imageURL), index);
downloadURL(new URI(imageURL).toURL(), index);
if (isStopped() || isThisATest()) {
break;
}
Expand All @@ -182,19 +186,26 @@ public void rip() throws IOException, URISyntaxException {
if (isStopped() || isThisATest()) {
break;
}

textindex += 1;
LOGGER.debug("Getting description from " + textURL);
String[] tempDesc = getDescription(textURL,doc);

if (tempDesc != null) {
if (Utils.getConfigBoolean("file.overwrite", false) || !(new File(
workingDir.getCanonicalPath()
+ ""
+ File.separator
+ getPrefix(index)
+ (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL)))
+ ".txt").exists())) {
URL url = new URI(textURL).toURL();
String filename = fileNameFromURL(url);

boolean fileExists = new File(
workingDir.getCanonicalPath()
+ ""
+ File.separator
+ getPrefix(index)
+ (tempDesc.length > 1 ? tempDesc[1] : filename)
+ ".txt").exists();

if (Utils.getConfigBoolean("file.overwrite", false) || !fileExists) {
LOGGER.debug("Got description from " + textURL);
saveText(new URL(textURL), "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL))));
saveText(url, "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : filename));
sleep(descSleepTime());
} else {
LOGGER.debug("Description from " + textURL + " already exists.");
Expand Down Expand Up @@ -225,12 +236,12 @@ public void rip() throws IOException, URISyntaxException {
}
waitForThreads();
}

/**
* Gets the file name from the URL
* @param url
* @param url
* URL that you want to get the filename from
* @return
* @return
* Filename of the URL
*/
private String fileNameFromURL(URL url) {
Expand All @@ -244,7 +255,7 @@ private String fileNameFromURL(URL url) {
return saveAs;
}
/**
*
*
* @param url
* Target URL
* @param subdirectory
Expand All @@ -253,7 +264,7 @@ private String fileNameFromURL(URL url) {
* Text you want to save
* @param index
* Index in something like an album
* @return
* @return
* True if ripped successfully
* False if failed
*/
Expand Down Expand Up @@ -295,12 +306,12 @@ private boolean saveText(URL url, String subdirectory, String text, int index, S
}
return true;
}

/**
* Gets prefix based on where in the index it is
* @param index
* @param index
* The index in question
* @return
* @return
* Returns prefix for a file. (?)
*/
protected String getPrefix(int index) {
Expand All @@ -313,9 +324,9 @@ protected String getPrefix(int index) {

/*
* ------ Methods copied from AlbumRipper. ------
* This removes AlbumnRipper's usage from this class.
* This removes AlbumnRipper's usage from this class.
*/

protected boolean allowDuplicates() {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -94,7 +95,7 @@ public void rip() throws IOException, URISyntaxException {

index += 1;
LOGGER.debug("Found image url #" + index+ ": " + imageURL);
downloadURL(new URL(imageURL), index);
downloadURL(new URI(imageURL).toURL(), index);
}

if (isStopped() || isThisATest()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import com.rarchives.ripme.utils.RipUtils;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
Expand Down Expand Up @@ -208,7 +209,7 @@ private boolean isURLBlacklisted(String url) {
return false;
}
@Override
public List<String> getURLsFromPage(Document page) {
public List<String> getURLsFromPage(Document page) throws URISyntaxException {
List<String> imageURLs = new ArrayList<>();
Pattern p; Matcher m;
for (Element link : page.select("a")) {
Expand Down Expand Up @@ -254,8 +255,8 @@ public List<String> getURLsFromPage(Document page) {
//Copied code from RedditRipper, getFilesFromURL should also implement stuff like flickr albums
URL originalURL;
try {
originalURL = new URL(href);
} catch (MalformedURLException e) {
originalURL = new URI(href).toURL();
} catch (MalformedURLException | URISyntaxException | IllegalArgumentException e) {
continue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
Expand All @@ -27,10 +29,10 @@ public LusciousRipper(URL url) throws IOException {
}

@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
public URL sanitizeURL(URL url) throws MalformedURLException, URISyntaxException{
String URLToReturn = url.toExternalForm();
URLToReturn = URLToReturn.replaceAll("https?://(?:www\\.)?luscious\\.", "https://old.luscious.");
URL san_url = new URL(URLToReturn);
URL san_url = new URI(URLToReturn).toURL();
LOGGER.info("sanitized URL is " + san_url.toExternalForm());
return san_url;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public List<String> getURLsFromPage(Document doc) {
for (Element thumb : thumbs) {
String link = thumb.attr("src").replaceAll("thumbs/th_", "");
String imgSrc = "http://nude-gals.com/" + link;
imgSrc = imgSrc.replaceAll(" ", "%20");
imageURLs.add(imgSrc);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public List<String> getURLsFromPage(Document doc) {
String[] lines = e.html().split("\n");
for (String line : lines) {
if (line.contains("html5player.setVideoUrlHigh")) {
String videoURL = line.replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
String videoURL = line.strip().replaceAll("\t", "").replaceAll("html5player.setVideoUrlHigh\\(", "").replaceAll("\'", "").replaceAll("\\);", "");
results.add(videoURL);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -51,7 +53,7 @@ public String getGID(URL url) throws MalformedURLException {
}

@Override
public void rip() throws IOException {
public void rip() throws IOException, URISyntaxException {
LOGGER.info(" Retrieving " + this.url);
String html = Http.url(this.url).get().toString();
if (html.contains("__fileurl = '")) {
Expand All @@ -62,7 +64,7 @@ public void rip() throws IOException {
throw new IOException("Could not find video URL at " + url);
}
String vidUrl = vidUrls.get(0);
addURLToDownload(new URL(vidUrl), HOST + "_" + getGID(this.url));
addURLToDownload(new URI(vidUrl).toURL(), HOST + "_" + getGID(this.url));
waitForThreads();
}
}
}

0 comments on commit 24c441e

Please sign in to comment.