Skip to content

Commit

Permalink
Fix: htmllinkparser is not evaluating targetUrl
Browse files Browse the repository at this point in the history
  • Loading branch information
nunnun committed Mar 18, 2018
1 parent 6d70ce6 commit e08dc5c
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
13 changes: 11 additions & 2 deletions crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class BlockCrawler extends EventEmitter {
return null;
}

if (!(shouldCrawl(URL.parse(context.url),allowedDomains))) {
if (!(shouldCrawl(URL.parse(absoluteTargetUrl),allowedDomains))) {
return null;
}

Expand All @@ -120,6 +120,7 @@ class BlockCrawler extends EventEmitter {
this.verbose = !argv.quiet;
this.proxyUri = argv.proxy;
this.redisserver = argv.redisserver;
this.debug = argv.debug;
var _allowed_domains = argv.allowed_domains;
if (undefined != _allowed_domains) {
try{
Expand All @@ -144,7 +145,7 @@ class BlockCrawler extends EventEmitter {
}
this.c = new supercrawler.Crawler(_crawleroptions);

console.log("Installed: " + this.c);
if(this.debug) console.log("Installed: " + this.c);

var _crawler = this;
this.c.addHandler("text/html", this._htmllinkparser({}));
Expand Down Expand Up @@ -179,6 +180,14 @@ class BlockCrawler extends EventEmitter {
console.log("Error: " + url + " (" + err.statusCode + ")");
});

this.c.on("crawlurl",function(url){
if(_crawler.debug) console.log("Now Crawling: " + url);
})

this.c.on("crawledurl",function(url,errorcode,statuscode){
if(_crawler.debug) console.log("Finished: " + url + ", " + statuscode);
})

var crwl = this.c;
this.c.on("urllistcomplete", function() {
console.log("Done");
Expand Down
4 changes: 2 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ const Base64 = require('js-base64').Base64;
const axios = require('axios');

const argv = require('yargs')
.option('quiet', {
alias: 'q',
.option('debug', {
alias: 'd',
type: 'boolean',
default: false
})
Expand Down

0 comments on commit e08dc5c

Please sign in to comment.