Skip to content

Commit e41eb25

Browse files
authored
Implement Fapwiz Ripper (#2086)
* Implement FapwizRipper
1 parent 647de8f commit e41eb25

File tree

2 files changed

+316
-0
lines changed

2 files changed

+316
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
package com.rarchives.ripme.ripper.rippers;
2+
3+
import java.io.IOException;
4+
import java.net.MalformedURLException;
5+
import java.net.URL;
6+
import java.util.ArrayList;
7+
import java.util.List;
8+
import java.util.regex.Matcher;
9+
import java.util.regex.Pattern;
10+
11+
import org.apache.logging.log4j.LogManager;
12+
import org.apache.logging.log4j.Logger;
13+
import org.jsoup.nodes.Document;
14+
import org.jsoup.select.Elements;
15+
16+
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
17+
import com.rarchives.ripme.utils.Http;
18+
19+
public class FapwizRipper extends AbstractHTMLRipper {
20+
21+
private static final Logger logger = LogManager.getLogger(FapwizRipper.class);
22+
23+
private static final Pattern CATEGORY_PATTERN = Pattern.compile("https?://fapwiz.com/category/([a-zA-Z0-9_-]+)/?$");
24+
25+
private static final Pattern USER_PATTERN = Pattern.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/?$");
26+
27+
// Note that the last part of the pattern can contain unicode emoji which
28+
// get encoded as %-encoded UTF-8 bytes in the URL, so we allow % characters.
29+
private static final Pattern POST_PATTERN = Pattern
30+
.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/([a-zA-Z0-9_%-]+)/?$");
31+
32+
public FapwizRipper(URL url) throws IOException {
33+
super(url);
34+
}
35+
36+
@Override
37+
public String getHost() {
38+
return "fapwiz";
39+
}
40+
41+
@Override
42+
public String getDomain() {
43+
return "fapwiz.com";
44+
}
45+
46+
@Override
47+
public String getGID(URL url) throws MalformedURLException {
48+
Matcher m;
49+
50+
m = CATEGORY_PATTERN.matcher(url.toExternalForm());
51+
if (m.matches()) {
52+
return "category_" + m.group(1);
53+
}
54+
55+
m = USER_PATTERN.matcher(url.toExternalForm());
56+
if (m.matches()) {
57+
return "user_" + m.group(1);
58+
}
59+
60+
m = POST_PATTERN.matcher(url.toExternalForm());
61+
if (m.matches()) {
62+
return "post_" + m.group(1) + "_" + m.group(2);
63+
}
64+
65+
throw new MalformedURLException("Expected fapwiz URL format: " +
66+
"fapwiz.com/USER or fapwiz.com/USER/POST or " +
67+
"fapwiz.com/CATEGORY - got " + url + " instead");
68+
}
69+
70+
void processUserOrCategoryPage(Document doc, List<String> results) {
71+
// The category page looks a lot like the structure of a user page,
72+
// so processUserPage is written to be compatible with both.
73+
doc.select(".post-items-holder img").forEach(e -> {
74+
String imgSrc = e.attr("src");
75+
76+
// Skip the user profile picture thumbnail insets
77+
if (imgSrc.endsWith("-thumbnail-icon.jpg")) {
78+
return;
79+
}
80+
81+
// Replace -thumbnail.jpg with .mp4
82+
String videoSrc = imgSrc.replace("-thumbnail.jpg", ".mp4");
83+
results.add(videoSrc);
84+
});
85+
}
86+
87+
void processCategoryPage(Document doc, List<String> results) {
88+
logger.info("Processing category page: " + url);
89+
processUserOrCategoryPage(doc, results);
90+
}
91+
92+
void processUserPage(Document doc, List<String> results) {
93+
logger.info("Processing user page: " + url);
94+
processUserOrCategoryPage(doc, results);
95+
}
96+
97+
void processPostPage(Document doc, List<String> results) {
98+
logger.info("Processing post page: " + url);
99+
doc.select("video source").forEach(video -> {
100+
results.add(video.attr("src"));
101+
});
102+
}
103+
104+
@Override
105+
public List<String> getURLsFromPage(Document doc) {
106+
List<String> results = new ArrayList<>();
107+
Matcher m;
108+
109+
m = CATEGORY_PATTERN.matcher(url.toExternalForm());
110+
if (m.matches()) {
111+
processCategoryPage(doc, results);
112+
}
113+
114+
m = USER_PATTERN.matcher(url.toExternalForm());
115+
if (m.matches()) {
116+
processUserPage(doc, results);
117+
}
118+
119+
m = POST_PATTERN.matcher(url.toExternalForm());
120+
if (m.matches()) {
121+
processPostPage(doc, results);
122+
}
123+
124+
return results;
125+
}
126+
127+
private Document getDocument(String url, int retries) throws IOException {
128+
return Http.url(url).userAgent(USER_AGENT).retries(retries).get();
129+
}
130+
131+
private Document getDocument(String url) throws IOException {
132+
return getDocument(url, 1);
133+
}
134+
135+
@Override
136+
public Document getNextPage(Document page) throws IOException {
137+
logger.info("Getting next page for url: " + url);
138+
Elements next = page.select("a.next");
139+
if (!next.isEmpty()) {
140+
String href = next.attr("href");
141+
logger.info("Found next page: " + href);
142+
return getDocument(href);
143+
} else {
144+
logger.info("No more pages");
145+
throw new IOException("No more pages.");
146+
}
147+
}
148+
149+
@Override
150+
public void downloadURL(URL url, int index) {
151+
sleep(2000);
152+
addURLToDownload(url, getPrefix(index));
153+
}
154+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
package com.rarchives.ripme.tst.ripper.rippers;
2+
3+
import java.io.IOException;
4+
import java.net.URI;
5+
import java.net.URISyntaxException;
6+
import java.net.URL;
7+
8+
import org.jsoup.nodes.Document;
9+
import org.junit.jupiter.api.Assertions;
10+
import org.junit.jupiter.api.Tag;
11+
import org.junit.jupiter.api.Test;
12+
13+
import com.rarchives.ripme.ripper.AbstractRipper;
14+
import com.rarchives.ripme.ripper.rippers.FapwizRipper;
15+
import com.rarchives.ripme.utils.Http;
16+
import com.rarchives.ripme.utils.Utils;
17+
18+
public class FapwizRipperTest extends RippersTest {
19+
@Test
20+
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
21+
public void testGetNextPage_NoNextPage() throws IOException, URISyntaxException {
22+
URL url = new URI("https://fapwiz.com/alison-esha/").toURL();
23+
FapwizRipper ripper = new FapwizRipper(url);
24+
25+
Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
26+
try {
27+
ripper.getNextPage(firstPage);
28+
// If we don't throw, we failed the text because there *was* a next
29+
// page even though there shouldn't be.
30+
Assertions.fail();
31+
} catch (IOException exception) {
32+
Assertions.assertTrue(true);
33+
}
34+
}
35+
36+
@Test
37+
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
38+
public void testGetNextPage_HasNextPage() throws IOException, URISyntaxException {
39+
URL url = new URI("https://fapwiz.com/miaipanema/").toURL();
40+
FapwizRipper ripper = new FapwizRipper(url);
41+
42+
Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
43+
try {
44+
Document doc = ripper.getNextPage(firstPage);
45+
Assertions.assertNotNull(doc);
46+
} catch (IOException exception) {
47+
// We should have found a next page but didn't.
48+
Assertions.fail();
49+
}
50+
}
51+
52+
@Test
53+
public void testRipPost() throws IOException, URISyntaxException {
54+
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
55+
FapwizRipper ripper = new FapwizRipper(url);
56+
testRipper(ripper);
57+
}
58+
59+
@Test
60+
public void testRipPostWithNumbersInUsername1() throws IOException, URISyntaxException {
61+
URL url = new URI("https://fapwiz.com/desperate_bug_7776/lets-be-friends-that-secretly-fuck-thanks/").toURL();
62+
FapwizRipper ripper = new FapwizRipper(url);
63+
testRipper(ripper);
64+
}
65+
66+
@Test
67+
public void testRipPostWithEmojiInShortUrl() throws IOException, URISyntaxException {
68+
URL url = new URI("https://fapwiz.com/miaipanema/my-grip-needs-a-name-%f0%9f%a4%ad%f0%9f%91%87%f0%9f%8f%bc/")
69+
.toURL();
70+
FapwizRipper ripper = new FapwizRipper(url);
71+
testRipper(ripper);
72+
}
73+
74+
@Test
75+
public void testRipPostWithEmojiInLongUrlAtEnd() throws IOException, URISyntaxException {
76+
URL url = new URI(
77+
"https://fapwiz.com/bimeat1998/just-imagine-youre-out-with-your-girl-and-your-buddies-and-then-she-makes-this-move-%f0%9f%98%8d/")
78+
.toURL();
79+
FapwizRipper ripper = new FapwizRipper(url);
80+
testRipper(ripper);
81+
}
82+
83+
@Test
84+
public void testRipPostWithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
85+
URL url = new URI(
86+
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
87+
.toURL();
88+
FapwizRipper ripper = new FapwizRipper(url);
89+
testRipper(ripper);
90+
}
91+
92+
// TODO Test rip user
93+
94+
// TODO Test rip category
95+
96+
@Test
97+
public void testPostGetGID1_Simple() throws IOException, URISyntaxException {
98+
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
99+
FapwizRipper ripper = new FapwizRipper(url);
100+
Assertions.assertEquals("post_petiteasiantravels_riding-at-9-months-pregnant", ripper.getGID(url));
101+
}
102+
103+
// Test Post pages GetGID
104+
105+
@Test
106+
public void testPostGetGID2_WithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
107+
URL url = new URI(
108+
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
109+
.toURL();
110+
FapwizRipper ripper = new FapwizRipper(url);
111+
112+
// In this case the filesystem safe version of the GID is
113+
// "post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-"
114+
// but the GID doesn't truncate and doesn't remove non-filesystem-safe
115+
// characters.
116+
String gid = ripper.getGID(url);
117+
Assertions.assertEquals(
118+
"post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96",
119+
gid);
120+
121+
// Test directory name on disk (filesystem safe sanitized as the ripper will
122+
// do).
123+
String directoryName = Utils.filesystemSafe(ripper.getHost() + "_" + gid);
124+
Assertions.assertEquals(
125+
"fapwiz_post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-",
126+
directoryName);
127+
}
128+
129+
// Test User pages GetGID
130+
131+
@Test
132+
public void testUserGetGID1_Simple() throws IOException, URISyntaxException {
133+
// Test a "simple" username that is all letters.
134+
URL url = new URI("https://fapwiz.com/petiteasiantravels/").toURL();
135+
FapwizRipper ripper = new FapwizRipper(url);
136+
Assertions.assertEquals("user_petiteasiantravels", ripper.getGID(url));
137+
}
138+
139+
@Test
140+
public void testUserGetGID2_Numbers() throws IOException, URISyntaxException {
141+
// Test a more complex username that contains numbers.
142+
URL url = new URI("https://fapwiz.com/bimeat1998/").toURL();
143+
FapwizRipper ripper = new FapwizRipper(url);
144+
Assertions.assertEquals("user_bimeat1998", ripper.getGID(url));
145+
}
146+
147+
@Test
148+
public void testUserGetGID3_HyphensAndNumbers() throws IOException, URISyntaxException {
149+
// Test a more complex username that contains hyphens and numbers.
150+
URL url = new URI("https://fapwiz.com/used-airport-4076/").toURL();
151+
FapwizRipper ripper = new FapwizRipper(url);
152+
Assertions.assertEquals("user_used-airport-4076", ripper.getGID(url));
153+
}
154+
155+
@Test
156+
public void testUserGetGID4_Underscores() throws IOException, URISyntaxException {
157+
// Test a more complex username that contains underscores.
158+
URL url = new URI("https://fapwiz.com/desperate_bug_7776/").toURL();
159+
FapwizRipper ripper = new FapwizRipper(url);
160+
Assertions.assertEquals("user_desperate_bug_7776", ripper.getGID(url));
161+
}
162+
}

0 commit comments

Comments
 (0)