diff --git a/README.md b/README.md index 9feb35ad..66fc9369 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ Options: # Required Setup: * Python 2.7 (because bunch of dependencies do not support Python 3.0) * Bunch of python libraries (use requirements.txt) +* [PhantomJS](http://phantomjs.org), [Firefox](https://www.mozilla.org/firefox) or [Chrome](https://www.google.com/chrome) (to assist in rendering websites that are javascript heavy) * In **Kali Linux**, please install the requirements using the command `pip install --upgrade --force-reinstall -r requirements.txt` ## Detailed Tool Documentation: diff --git a/docs/index.md b/docs/index.md index f115ebca..9be0127b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -27,7 +27,9 @@ People can either write modules for DataSploit or can simpley import datasploit Worried about setup? We got you. You should be worried about two things: -* Install the required python dependencies. Either use requirements.txt or simpley pip install datasploit. +* Install dependencies + - Python dependencies: Either use requirements.txt (`pip install -r requirements.txt`) if installed with `git clone` or simply `pip install datasploit` to install everything needed as a library. + - System dependencies: PhantomJS, Chrome or Firefox needs to be installed for certain modules to render javascript heavy websites. * Feeding specific API keys for few specific sources. We are going to have a knowledge base where step by step instructions to generate these API keys will be documented. Sweet deal? * [Click here to check step by step setup guide](/setupGuide/) diff --git a/domain/domain_history.py b/domain/domain_history.py index ff8fe69c..3124c49f 100755 --- a/domain/domain_history.py +++ b/domain/domain_history.py @@ -2,9 +2,10 @@ import base import sys -import requests from bs4 import BeautifulSoup import re +from selenium import webdriver +from selenium.common.exceptions import WebDriverException from termcolor import colored import time @@ -20,14 +21,31 @@ def netcraft_domain_history(domain): ip_history_dict = {} time.sleep(0.3) endpoint = "http://toolbar.netcraft.com/site_report?url=%s" % (domain) - req = requests.get(endpoint) - - soup = BeautifulSoup(req.content, 'html.parser') - urls_parsed = soup.findAll('a', href=re.compile(r'.*netblock\?q.*')) - for url in urls_parsed: - if urls_parsed.index(url) != 0: - ip_history_dict[str(url).split('=')[2].split(">")[1].split("<")[0]] = str(url.parent.findNext('td')).strip( - "").strip("") + # These try's could be in a for loop, but I wanted manual control + # over the order in which the webdrivers were chosen. + driver = None + try: + webdriver.PhantomJS() + driver = webdriver.PhantomJS() + except WebDriverException: + try: + webdriver.Firefox().quit() + driver = webdriver.Firefox() + except WebDriverException: + try: + webdriver.Chrome().quit() + driver = webdriver.Chrome() + except WebDriverException: + ip_history_dict = { 'Error': 'No WebDriver Found!\nTry installing PhantomJS or adding the Chrome or Firefox binaries to your $PATH.'} + if driver: + driver.get(endpoint) + html = driver.page_source + soup = BeautifulSoup(html, 'html.parser') + urls_parsed = soup.findAll('a', href=re.compile(r'.*netblock\?q.*')) + for url in urls_parsed: + if urls_parsed.index(url) != 0: + ip_history_dict[url['href'].split('=')[1]] = url.get_text() + driver.quit() return ip_history_dict @@ -40,8 +58,16 @@ def main(domain): def output(data, domain=""): - for x in data.keys(): - print "%s: %s" % (data[x], x) + if len(data.keys()) > 0: + for x in data.keys(): + if 'Error' in x: + print data[x] + data[x] = '' + else: + print "%s: %s" % (data[x], x) + else: + print colored(style.BOLD + '\n[!] No previous domain owners found!\n' + + style.END, 'red') print "\n-----------------------------\n" diff --git a/requirements.txt b/requirements.txt index b7f0f977..2bfdfb11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,7 @@ python-whois pytz requests requests-file +selenium simplejson termcolor tld