forked from AlexDmr/tacthab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextracteur.js
96 lines (89 loc) · 2.96 KB
/
extracteur.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
var last_result = null;
define ( [ './js/utils.js'
, './js/domReady.js'
]
, function(utils, domReady) {
var parser = new DOMParser();
var result = {};
function startExtraction(url) {
result = {};
ExtractFromSection( url
, function() {
console.log("Result:", result);
last_result = result;
}
);
}
function ExtractFromSection(url, next) {
var server = url.slice(0, url.indexOf('/', 7));
utils.XHR( 'POST', './proxy'
, { onload : function() {
var doc = parser.parseFromString(this.responseText, "text/html");
var L = doc.querySelectorAll('ul.topiclist.topics a.topictitle');
if(typeof result.sectionTitle === "undefined") {
result.sectionTitle = doc.querySelector("#page-body h2").innerText;
}
if(typeof result.L_topics_JSON === "undefined") {
result.L_topics_JSON = [];
}
ExtractFromSectionDoc(doc, server, L, 0, result.L_topics_JSON, next);
}
, variables : {url: url}
}
);
}
function ExtractFromSectionDoc(doc, server, L_topics, i, L_topics_JSON, next) {
// console.log("ExtractFromSectionDoc", next);
var url = server + '/' + L_topics.item(i).getAttribute('href');
var result = { topic : L_topics.item(i).innerText
, posts : []
};
console.log(result.topic);
L_topics_JSON.push( result );
ExtractFromTopic( url, server, result
, function() {
if(++i < L_topics.length) {
ExtractFromSectionDoc(doc, server, L_topics, i, L_topics_JSON, next);
} else {var right = doc.querySelector('a.right-box.right');
if(right) {
var nextURL = server + '/' + right.getAttribute('href');
// console.log("\tnext page for section!");
ExtractFromSection(nextURL, next);
} else {console.log("End of section");
if(next) {next();} else {console.log("L_topics_JSON:", L_topics_JSON);}
}
}
}
);
}
function ExtractFromTopic(url, server, result, next) {
// console.log("ExtractFromTopic", url);
utils.XHR( 'POST', './proxy'
, { onload : function() {
var doc = parser.parseFromString(this.responseText, "text/html");
var L = doc.querySelectorAll('div.post div.postbody div.content');
console.log("\t", L.length, "posts for", url);
for(var i=0; i<L.length; i++) {
result.posts.push( L.item(i).innerText );
}
var right = doc.querySelector('a.right-box.right');
if(right) {
var nextURL = server + '/' + right.getAttribute('href');
// console.log("\tnext page for topic!");
ExtractFromTopic(nextURL, server, result, next);
} else {
next();
}
}
, variables : {url: url}
}
);
}
domReady( function() {
document.getElementById("process").onclick = function() {
startExtraction( document.getElementById("URL").value );
}
}
);
return startExtraction;
}); // FIN