Skip to content

Commit

Permalink
Have force_https option
Browse files Browse the repository at this point in the history
  • Loading branch information
nunnun committed Mar 18, 2018
1 parent e08dc5c commit 8a46147
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
8 changes: 7 additions & 1 deletion crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class BlockCrawler extends EventEmitter {
}
var shouldCrawl = this.shouldCrawl;
var allowedDomains = this.allowedDomains;
var forceHttps = this.forceHttps;
return function(context) {

var $;
Expand All @@ -93,7 +94,11 @@ class BlockCrawler extends EventEmitter {
targetHref = $this.attr("href");
absoluteTargetUrl = urlMod.resolve(context.url, targetHref);
urlObj = urlMod.parse(absoluteTargetUrl);
protocol = urlObj.protocol;
if(forceHttps){
protocol = 'https'
}else{
protocol = urlObj.protocol;
}
hostname = urlObj.hostname;


Expand Down Expand Up @@ -121,6 +126,7 @@ class BlockCrawler extends EventEmitter {
this.proxyUri = argv.proxy;
this.redisserver = argv.redisserver;
this.debug = argv.debug;
this.forceHttps = argv.force_https;
var _allowed_domains = argv.allowed_domains;
if (undefined != _allowed_domains) {
try{
Expand Down
5 changes: 5 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ const argv = require('yargs')
type: 'string',
description: 'hostname:UrlPattern JSON object for allowed domains'
})
.option('force_https',{
type: 'boolean',
description: "Force https instead of http",
default: true
})
//.demandCommand(1)
.argv;

Expand Down

0 comments on commit 8a46147

Please sign in to comment.