Skip to content

Commit d2f7236

Browse files
committed
read me
1 parent d03792a commit d2f7236

File tree

7 files changed

+125
-33
lines changed

7 files changed

+125
-33
lines changed

PageSamples/FriendsList.html

+15-1
Large diffs are not rendered by default.

lib/modules/Parser.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ module.exports = {
1111
selectors: {
1212
errors: ['div:nth-child(2) div div:nth-child(1) > span'],
1313
userLikes: ['._55wp a:first-child strong', ' div:nth-child(2) > a:nth-child(1) a', 'h3.be a', ' div div div div div a:nth-child(1)'],
14-
userFriends: [' td.v.s:nth-child(2) a.ce', ' td.v.s:nth-child(2) a.bn'],
14+
userFriends: [' td:nth-child(2) a', 'td:nth-child(2) a', 'li:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1)'],
1515
total: ['div.t > a.u.v:nth-child(1)', 'div:nth-child(1) div:nth-child(2) div div:nth-child(1) > h3'],
1616
shares: ['div.v div.y a:nth-child(1)'],
1717
NextUrl: ['#m_more_friends a', 'table.i.j tr:nth-child(1) td div.e a']

lib/modules/getURL.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ var proxy = 'socks5://127.0.0.1:9050';
2020

2121
// settings
2222

23-
var baseurl = 'https://m.facebook.com/';
23+
var baseurl = 'https://facebookcorewwwi.onion/'; // 'https://m.facebook.com/'; //
2424
var puppeteerConf = {
2525
args: ['--proxy-server=' + proxy, '--ignore-certificate-errors', '--disable-setuid-sandbox', '--no-sandbox', '--disable-gpu', '--no-first-run', '--disable-setuid-sandbox=true', '--window-size=1920x1080', '--disable-accelerated-2d-canvas=true'],
2626
headless: true
@@ -88,7 +88,7 @@ var getUrl = function () {
8888

8989
// navigate to the website
9090
var response = await page.goto(fullurl, {
91-
timeout: 25000,
91+
timeout: 50000,
9292
waitUntil: 'networkidle0'
9393
});
9494

readme.md

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Facebook screper
2+
3+
4+
### Dev nots
5+
6+
start:
7+
npm i
8+
9+
build:
10+
npm run build
11+
12+
Run:
13+
(node) node lib/app
14+
(pm2) pm2 lib/app.js
15+
16+
#### Connections
17+
18+
src/getUrl.js
19+
20+
> const baseurl
21+
22+
set base address for scrapper. https://m.facebook.com recomanded, https://facebookcorewwwi.onion if using Tor.
23+
24+
> const proxy
25+
26+
set proxy address if using. Note that ports needs to be open in server.
27+
28+
> const useProxy
29+
30+
Enable/disable proxy usage
31+
32+
#### Using Tor
33+
34+
If using tor as proxy, install tor on the server/dev env by instructions:
35+
36+
linux:
37+
https://www.torproject.org/docs/tor-doc-unix.html.en
38+
39+
OSX/MacOS:
40+
https://www.torproject.org/docs/tor-doc-osx.html.en
41+
42+
43+
#### Selectors
44+
45+
scraping data is done by selectors.
46+
47+
src/parser -
48+
Add/edit selectors:
49+
> selectors
50+
51+
Add/edit search strings (for text search):
52+
> search
53+
54+
each selector type can have multiple selector options to be scrapped.
55+
1st selector that returnes data - will be returned.
56+
57+
#### analyze selectors
58+
> /PageSamples
59+
60+
Page samples from returned data, to be used to analyze selectors and responses.
61+
62+
--------------------
63+
64+
65+
### Requests
66+
67+
get your cookie by:
68+
- log to facebook with your account
69+
- open network in developr tools
70+
- find any HTML resource call
71+
- find it's request headers
72+
- copy the "Cookie" header
73+
74+
add your cookie at the end of every call, as in the sample:
75+
76+
https://serveraddress/endpoint?param=param&coockie=YourFacebookCookie
77+
78+
#### Current end point
79+
80+
- getLikes param: id / full url (everything after the domain name)
81+
- getShares param: id / full url (everything after the domain name)
82+
- getFrinds param: name / id
83+
84+
for load testing:
85+
- testPage param: (everything after the domain name)
86+
87+
88+
sample request:
89+
http://localhost:1984/getFrinds?name=bibi.netanyaho&cookie=fr=0kjkn887nHZ9qh.AWVUuzTatNKAV-2AHMaAa-yr7R0.Bax3uv.RT.AAA.0.0.BcWTvP.AWUM7xIU;%20sb=_J8CWsdsdsdslTIzFC8dv0vndc0s;%20datr=_J8CW6NvLHqAPlgT8HSFslLj;%20wd=1440x712;%kmkmkmk;%20xs=9%3AR_m5q%3A2%3A1549351887%3A20786%3A15166;%20pl=n
90+

src/app.js

-24
Original file line numberDiff line numberDiff line change
@@ -41,29 +41,5 @@ process.on('uncaughtException', function (err) {
4141
});
4242

4343

44-
// getUserFrinds(testData.user);
45-
46-
// getLikesFromID(null, testData.likesFullUrl);
47-
// getSharesFromID(testData.itemID);
48-
// testPageLoad('/story.php?story_fbid=2622896591058463&id=366827403332071');
49-
50-
// FB API samples
51-
// FBConn.getLikes(testData.itemID);
52-
// FBConn.getUser(testData.userID);
53-
54-
55-
/*
56-
const testData = {
57-
video: '/ufi/reaction/profile/browser/fetch/?limit=10&total_count=351&ft_ent_identifier=281112199228679',
58-
likesFullUrl: 'ufi/reaction/profile/browser/?ft_ent_identifier=2027002237395095',
59-
post: 'netanyahu/photos/a.10151681566507076/10156096314307076',
60-
likes: '2027002237395095',
61-
user: 'peter.huwel', //'moshe.dzanashvili' // 'michael.even.54' // MAYASR
62-
itemID: '366827403332071',
63-
userID: '550385403'
64-
};
65-
*/
66-
67-
6844

6945

src/modules/Parser.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ module.exports = {
55
selectors: {
66
errors:['div:nth-child(2) div div:nth-child(1) > span'],
77
userLikes: ['._55wp a:first-child strong', ' div:nth-child(2) > a:nth-child(1) a', 'h3.be a', ' div div div div div a:nth-child(1)'],
8-
userFriends: [' td.v.s:nth-child(2) a.ce', ' td.v.s:nth-child(2) a.bn'],
8+
userFriends: [' td:nth-child(2) a', 'td:nth-child(2) a', 'li:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1)'],
99
total: ['div.t > a.u.v:nth-child(1)', 'div:nth-child(1) div:nth-child(2) div div:nth-child(1) > h3'],
1010
shares: ['div.v div.y a:nth-child(1)'],
1111
NextUrl: ['#m_more_friends a', 'table.i.j tr:nth-child(1) td div.e a']

src/modules/getURL.js

+16-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import puppeteer from 'puppeteer';
22
import {cookie, userAgent} from '../../conf/conf';
33

4+
const useProxy = true; // set to use proxy by the settings below.
5+
const proxy = 'socks5://127.0.0.1:9050';
6+
7+
const baseurl = 'https://m.facebook.com/'; //'https://facebookcorewwwi.onion/';
8+
9+
/* Resource types to block from loading, for speed up and less resources */
410
const blockedResourceTypes = [
511
'image',
612
'media',
@@ -33,14 +39,12 @@ const skippedResources = [
3339
'tiqcdn',
3440
];
3541

36-
const proxy = 'socks5://127.0.0.1:9050';
3742

3843
// settings
3944

40-
const baseurl = 'https://m.facebook.com/';
45+
4146
const puppeteerConf = {
4247
args: [
43-
`--proxy-server=` + proxy,
4448
`--ignore-certificate-errors`,
4549
'--disable-setuid-sandbox',
4650
'--no-sandbox',
@@ -53,12 +57,20 @@ const puppeteerConf = {
5357
headless: true
5458
};
5559

60+
if (useProxy){
61+
puppeteerConf.args.push(
62+
`--proxy-server=` + proxy
63+
)
64+
}
65+
5666

5767
class getUrl {
5868

5969
async init() {
6070
console.log('init...');
6171
this.cookie = cookie;
72+
73+
// test connection to facebook
6274
const test = await this.loadURL('');
6375
if (test && typeof test === 'string' && test.indexOf('<') === 0) {
6476
console.log('Init connection test passed...');
@@ -109,7 +121,7 @@ class getUrl {
109121

110122
// navigate to the website
111123
const response = await page.goto(fullurl, {
112-
timeout: 25000,
124+
timeout: 50000,
113125
waitUntil: 'networkidle0',
114126
});
115127

0 commit comments

Comments
 (0)