From 3febf12e2ca11aa356d9e58e8b0abbc7b21daad2 Mon Sep 17 00:00:00 2001 From: Administrator Date: Sun, 19 Feb 2017 18:15:29 -0800 Subject: [PATCH 1/4] Fixed bug related to UTF-8 in certain requests Fixed bug in domain_censys.py not printing the given ip as well --- active_default_file_check.py | 3 +- core/osint/domain_GooglePDF.py | 45 +++++++++++++++--------------- core/osint/domain_censys.py | 19 +++++++------ core/osint/domain_subdomains.py | 20 +++++++------- core/osint/domain_wikileaks.py | 4 +-- domainOsint.py | 49 +++++++++++++++++---------------- domain_GooglePDF.py | 45 +++++++++++++++--------------- domain_censys.py | 14 ++++++---- domain_forumsearch.py | 2 +- domain_pastes.py | 7 +++-- domain_subdomains.py | 18 ++++++------ domain_zoomeye.py | 4 +-- emailOsint.py | 20 +++++++------- usernameOsint.py | 27 +++++++++--------- username_gitscrape.py | 2 +- 15 files changed, 145 insertions(+), 134 deletions(-) diff --git a/active_default_file_check.py b/active_default_file_check.py index 9370d3c5..9aa3dc4e 100644 --- a/active_default_file_check.py +++ b/active_default_file_check.py @@ -1,8 +1,9 @@ import requests import re +import codecs import sys -list_urls = open("check_urls.txt") +list_urls = codecs.open("check_urls.txt", encoding='utf-8') existing_urls = [] host = sys.argv[1] base_url = "http://" + host + "/" diff --git a/core/osint/domain_GooglePDF.py b/core/osint/domain_GooglePDF.py index 262ccc0f..745b532f 100644 --- a/core/osint/domain_GooglePDF.py +++ b/core/osint/domain_GooglePDF.py @@ -1,37 +1,38 @@ -from bs4 import BeautifulSoup -import sys -import urllib2 -import re -import string +from bs4 import BeautifulSoup +import sys +import urllib2 +import re +import string from celery import shared_task from osint.utils import * ''' -This code is a bit messed up. Lists files from first page only. Needs a lot of modification. +This code is a bit messed up. Lists files from first page only. Needs a lot of modification. ''' def googlesearch(query, ext): print query try: - google="https://www.google.co.in/search?filter=0&q=site:" + google="https://www.google.co.in/search?filter=0&q=site:" getrequrl="https://www.google.co.in/search?filter=0&num=100&q=%s&start=" % (query) - hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', - 'Accept-Encoding': 'none', - 'Accept-Language': 'en-US,en;q=0.8', - 'Connection': 'keep-alive'} - req=urllib2.Request(getrequrl, headers=hdr) - response=urllib2.urlopen(req) - data = response.read() - data=re.sub('','',data) - for e in ('>','=','<','\\','(',')','"','http',':','//'): - data = string.replace(data,e,' ') + hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', + 'Accept-Encoding': 'none', + 'Accept-Language': 'en-US,en;q=0.8', + 'Connection': 'keep-alive'} + req=urllib2.Request(getrequrl, headers=hdr) + response=urllib2.urlopen(req) + encoding = response.headers.getparam('charset') + data = response.read().encode(encoding) + data=re.sub('','',data) + for e in ('>','=','<','\\','(',')','"','http',':','//'): + data = string.replace(data,e,' ') - r1 = re.compile('[-_.a-zA-Z0-9.-_]*'+'\.'+ ext) - res = r1.findall(data) + r1 = re.compile('[-_.a-zA-Z0-9.-_]*'+'\.'+ ext) + res = r1.findall(data) if not res: print "No results were found" return [] @@ -52,7 +53,7 @@ def run(domain, taskId): return data if __name__ == "__main__": - domain=sys.argv[1] + domain=sys.argv[1] print "\t\t\t[+] PDF Files\n" result = run(domain) diff --git a/core/osint/domain_censys.py b/core/osint/domain_censys.py index 9968e7f5..05eb77a9 100644 --- a/core/osint/domain_censys.py +++ b/core/osint/domain_censys.py @@ -6,10 +6,9 @@ def censys_search(domain): + #censys_list = [] pages = float('inf') page = 1 - - #censys_list = [] while page <= pages: print "Parsed and collected results from page %s" % (str(page)) params = {'query' : domain, 'page' : page} @@ -24,17 +23,19 @@ def censys_search(domain): proto = r["protocols"] proto = [p.split("/")[0] for p in proto] proto.sort(key=float) - protoList = ','.join(map(str, proto)) + protoList = ','.join(map(str, proto)) temp_dict["ip"] = ip - temp_dict["protocols"] = protoList - + temp_dict["protocols"] = protoList + #print '[%s] IP: %s - aaProtocols: %s' % (colored('*', 'red'), ip, protoList) - + + print temp_dict if '80' in protoList: new_dict = view(ip, temp_dict) censys_list.append(new_dict) else: + censys_list.append(temp_dict) pages = payload['metadata']['pages'] @@ -47,8 +48,7 @@ def censys_search(domain): def view(server, temp_dict): res = requests.get("https://www.censys.io/api/v1/view/ipv4/%s" % (server), auth = (cfg.censysio_id, cfg.censysio_secret)) - payload = res.json() - + payload = res.json() try: if 'title' in payload['80']['http']['get'].keys(): #print "[+] Title: %s" % payload['80']['http']['get']['title'] @@ -56,7 +56,7 @@ def view(server, temp_dict): temp_dict['title'] = title if 'server' in payload['80']['http']['get']['headers'].keys(): header = "[+] Server: %s" % payload['80']['http']['get']['headers']['server'] - temp_dict["server_header"] = payload['80']['http']['get']['headers']['server'] + temp_dict["server_header"] = payload['80']['http']['get']['headers']['server'] return temp_dict except Exception as error: @@ -68,6 +68,7 @@ def view(server, temp_dict): def main(): domain = sys.argv[1] + print "Starting censys on : " + domain censys_search(domain) for x in censys_list: print x diff --git a/core/osint/domain_subdomains.py b/core/osint/domain_subdomains.py index 29176386..e24845be 100644 --- a/core/osint/domain_subdomains.py +++ b/core/osint/domain_subdomains.py @@ -1,6 +1,6 @@ import sys import json -import requests +import requests from bs4 import BeautifulSoup import re from domain_pagelinks import pagelinks @@ -28,7 +28,7 @@ def subdomains(domain): headers = {} headers['Referer'] = "https://dnsdumpster.com/" req = requests.post("https://dnsdumpster.com/", data = data, cookies = cookies, headers = headers) - soup = BeautifulSoup(req.content, 'lxml') + soup = BeautifulSoup(req.content.encode('utf-8'), 'lxml') subdomains=soup.findAll('td',{"class":"col-md-4"}) for subd in subdomains: if domain in subd.text: @@ -63,7 +63,7 @@ def find_subdomains_from_wolfram(domain): if recalculate != "": recalc_code = json.loads(req1.content)['queryresult']['recalculate'].split("=")[1].split("&")[0] - + #third request to get calc_id #print "http://www.wolframalpha.com/input/json.jsp?action=recalc&format=image,plaintext,imagemap,minput,moutput&id=%s&output=JSON&output=JSON&scantimeout=10&statemethod=deploybutton&storesubpodexprs=true" % (recalc_code) req2 = requests.get("http://www.wolframalpha.com/input/json.jsp?action=recalc&format=image,plaintext,imagemap,minput,moutput&id=%s&output=JSON&output=JSON&scantimeout=10&statemethod=deploybutton&storesubpodexprs=true" % (recalc_code), headers=headers, proxies=proxies) @@ -71,7 +71,7 @@ def find_subdomains_from_wolfram(domain): for x in pods: if "Web statistics for" in x['title']: async_code = x['async'].split('=')[1] - + #fourth request to get id for subdomains. req3 = requests.get("http://www.wolframalpha.com/input/json.jsp?action=asyncPod&format=image,plaintext,imagemap,minput,moutput&formattimeout=20&id=%s&output=JSON&podtimeout=20&statemethod=deploybutton&storesubpodexprs=true" % (async_code), headers=headers, proxies=proxies) for x in json.loads(req3.content)['pods'][0]['deploybuttonstates']: @@ -80,7 +80,7 @@ def find_subdomains_from_wolfram(domain): sub_code = x['input'] else: pass - + #fifth request to find few subdomains url = "http://www.wolframalpha.com/input/json.jsp?async=false&dbid=%s&format=image,plaintext,imagemap,sound,minput,moutput&includepodid=WebSiteStatisticsPod:InternetData&input=%s&output=JSON&podTitle=Web+statistics+for+all+of+%s&podstate=%s&s=%s&statemethod=deploybutton&storesubpodexprs=true&text=Subdomains" % (sub_code, domain, domain, sub_code, server_value) req4 = requests.get(url, headers = headers, proxies = proxies) @@ -97,9 +97,9 @@ def find_subdomains_from_wolfram(domain): else: more_code = "blank_bro" - #wooh, final request bitch. + #wooh, final request bitch. url = "http://www.wolframalpha.com/input/json.jsp?async=false&dbid=%s&format=image,plaintext,imagemap,sound,minput,moutput&includepodid=WebSiteStatisticsPod:InternetData&input=%s&output=JSON&podTitile=Subdomains&podstate=%s&s=%s&statemethod=deploybutton&storesubpodexprs=true&text=More" % (more_code, domain, more_code, servervalue_for_more) - req5 = requests.get(url, headers = headers, proxies = proxies) + req5 = requests.get(url, headers = headers, proxies = proxies) for x in json.loads(req5.content)['queryresult']['subpods']: if x['title'] == "Subdomains": temp_subdomain_list = x['plaintext'].split("\n") @@ -129,7 +129,7 @@ def subdomains_from_netcraft(domain): link_regx = re.compile('') links_list = link_regx.findall(req1.content) for x in links_list: - dom_name = x.split("/")[2].split(".") + dom_name = x.split("/")[2].split(".") if (dom_name[len(dom_name) - 1] == target_dom_name[1]) and (dom_name[len(dom_name) - 2] == target_dom_name[0]): check_and_append_subdomains(x.split("/")[2]) num_regex = re.compile('Found (.*) site') @@ -151,7 +151,7 @@ def subdomains_from_netcraft(domain): link_regx = re.compile('') links_list = link_regx.findall(req2.content) for y in links_list: - dom_name1 = y.split("/")[2].split(".") + dom_name1 = y.split("/")[2].split(".") if (dom_name1[len(dom_name1) - 1] == target_dom_name[1]) and (dom_name1[len(dom_name1) - 2] == target_dom_name[0]): check_and_append_subdomains(y.split("/")[2]) last_item = links_list[len(links_list) - 1].split("/")[2] @@ -174,7 +174,7 @@ def run(domain, taskId): subdomains_from_netcraft(odomain) save_record(domain, taskId, "Subdomains", subdomain_list) return subdomain_list - + def main(): diff --git a/core/osint/domain_wikileaks.py b/core/osint/domain_wikileaks.py index 86aadda3..111125c3 100644 --- a/core/osint/domain_wikileaks.py +++ b/core/osint/domain_wikileaks.py @@ -10,7 +10,7 @@ def wikileaks(domain, taskId): req = requests.get('https://search.wikileaks.org/?query=&exact_phrase=%s&include_external_sources=True&order_by=newest_document_date'%(domain)) soup=BeautifulSoup(req.content, "lxml") count=soup.findAll('div',{"class":"total-count"}) - print "Total "+count[0].text + print "Total "+count[0].text.encode('utf-8') divtag=soup.findAll('div',{'class':'result'}) links={} for a in divtag: @@ -28,7 +28,7 @@ def main(): print "%s (%s)" % (lnk, tl) print "For all results, visit: "+ 'https://search.wikileaks.org/?query=&exact_phrase=%s&include_external_sources=True&order_by=newest_document_date'%(domain) print "\n-----------------------------\n" - + #if __name__ == "__main__": diff --git a/domainOsint.py b/domainOsint.py index 87c416d9..1601c552 100755 --- a/domainOsint.py +++ b/domainOsint.py @@ -4,7 +4,8 @@ import whois import requests import socket -import sys +import codecs +import sys import json from Wappalyzer import Wappalyzer, WebPage from bs4 import BeautifulSoup @@ -104,23 +105,23 @@ def printart(): def do_everything(domain): dict_to_apend['targetname'] = domain - + API_URL = "https://www.censys.io/api/v1" #print cfg.zoomeyeuser - + #print WhoIs information whoisdata = whoisnew(domain) print whoisdata dict_to_apend['whois'] = whoisdata - + #print DNS Information dns_records = parse_dns_records(domain) - #dict_to_apend['dns_records'] = dns_records > not working + #dict_to_apend['dns_records'] = dns_records > not working #bson.errors.InvalidDocument: Cannot encode object: - + for x in dns_records.keys(): print x if "No" in dns_records[x] and "Found" in dns_records[x]: @@ -129,7 +130,7 @@ def do_everything(domain): for y in dns_records[x]: print "\t%s" % (y) #print type(dns_records[x]) - + print colored(style.BOLD + '\n---> Finding Paste(s)..\n' + style.END, 'blue') if cfg.google_cse_key != "" and cfg.google_cse_key != "XYZ" and cfg.google_cse_cx != "" and cfg.google_cse_cx != "XYZ": total_results = google_search(domain, 1) @@ -137,7 +138,7 @@ def do_everything(domain): more_iters = (total_results / 10) if more_iters >= 10: print colored(style.BOLD + '\n---> Too many results, Daily API limit might exceed\n' + style.END, 'red') - for x in xrange(1,more_iters + 1): + for x in xrange(1,more_iters + 1): google_search(domain, (x*10)+1) print "\n\n-----------------------------\n" else: @@ -184,26 +185,26 @@ def do_everything(domain): except: print "[-] HTTPS connection was unavailable" wappalyze_results['https'] = [] - + if len(wappalyze_results.keys()) >= 1: dict_to_apend['wappalyzer'] = wappalyze_results - + #make Search github code for the given domain. - + git_results = github_search(domain, 'Code') if git_results is not None: print git_results else: print colored("Sad! Nothing found on github", 'red') - - #collecting emails for the domain and adding information in master email list. + + #collecting emails for the domain and adding information in master email list. if cfg.emailhunter != "": emails = emailhunter(domain) if len(collected_emails) >= 1: for x in collected_emails: - print str(x) + print str(x) dict_to_apend['email_ids'] = collected_emails @@ -212,7 +213,7 @@ def do_everything(domain): while True: a = raw_input(colored("\n\nDo you want to launch osint check for these emails? [(Y)es/(N)o/(S)pecificEmail]: ", 'red')) - if a.lower() =="yes" or a.lower() == "y": + if a.lower() =="yes" or a.lower() == "y": for x in collected_emails: print "Checking for %s" % x print_emailosint(x) @@ -235,7 +236,7 @@ def do_everything(domain): ''' - + dns_ip_history = netcraft_domain_history(domain) if len(dns_ip_history.keys()) >= 1: for x in dns_ip_history.keys(): @@ -248,16 +249,16 @@ def do_everything(domain): ##print "---> Check_subdomains from wolframalpha" ##find_subdomains_from_wolfram(domain) - + #domain pagelinks - links=pagelinks(domain) + links=pagelinks(domain) if len(links) >= 1: for x in links: print x dict_to_apend['pagelinks'] = links - + #calling and printing subdomains after pagelinks. subdomains_from_netcraft(domain) @@ -267,7 +268,7 @@ def do_everything(domain): for sub in subdomain_list: print sub dict_to_apend['subdomains'] = subdomain_list - + #wikileaks leaklinks=wikileaks(domain) for tl,lnk in leaklinks.items(): @@ -275,8 +276,8 @@ def do_everything(domain): if len(leaklinks.keys()) >= 1: dict_to_apend['wikileaks'] = leaklinks print "For all results, visit: "+ 'https://search.wikileaks.org/?query=&exact_phrase=%s&include_external_sources=True&order_by=newest_document_date'%(domain) - - + + links_brd =boardsearch_forumsearch(domain) for tl,lnk in links_brd.items(): @@ -334,8 +335,10 @@ def do_everything(domain): -def main(): +def main(): signal.signal(signal.SIGINT, signal_handler) + sys.stdout = codecs.getwriter('utf8')(sys.stdout) + sys.stderr = codecs.getwriter('utf8')(sys.stderr) options, args = parser.parse_args() printart() domain = options.domain diff --git a/domain_GooglePDF.py b/domain_GooglePDF.py index acb63a3d..3bc3b585 100755 --- a/domain_GooglePDF.py +++ b/domain_GooglePDF.py @@ -1,41 +1,42 @@ #!/usr/bin/env python -from bs4 import BeautifulSoup -import sys -import urllib2 -import re -import string +from bs4 import BeautifulSoup +import sys +import urllib2 +import re +import string ''' -This code is a bit messed up. Lists files from first page only. Needs a lot of modification. +This code is a bit messed up. Lists files from first page only. Needs a lot of modification. ''' def googlesearch(query, ext): print query - google="https://www.google.co.in/search?filter=0&q=site:" + google="https://www.google.co.in/search?filter=0&q=site:" getrequrl="https://www.google.co.in/search?filter=0&num=100&q=%s&start=" % (query) - hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', - 'Accept-Encoding': 'none', - 'Accept-Language': 'en-US,en;q=0.8', - 'Connection': 'keep-alive'} - req=urllib2.Request(getrequrl, headers=hdr) - response=urllib2.urlopen(req) - data = response.read() - data=re.sub('','',data) - for e in ('>','=','<','\\','(',')','"','http',':','//'): - data = string.replace(data,e,' ') + hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', + 'Accept-Encoding': 'none', + 'Accept-Language': 'en-US,en;q=0.8', + 'Connection': 'keep-alive'} + req=urllib2.Request(getrequrl, headers=hdr) + response=urllib2.urlopen(req) + encoding = response.headers.getparam('charset') + data = response.read().decode(encoding) + data=re.sub('','',data) + for e in ('>','=','<','\\','(',')','"','http',':','//'): + data = string.replace(data,e,' ') - r1 = re.compile('[-_.a-zA-Z0-9.-_]*'+'\.'+ ext) - res = r1.findall(data) + r1 = re.compile('[-_.a-zA-Z0-9.-_]*'+'\.'+ ext) + res = r1.findall(data) if res==[]: print "No results were found" else: return res -domain=sys.argv[1] +domain=sys.argv[1] print "\t\t\t[+] PDF Files\n" list_ext = ["pdf", "xls", "docx"] diff --git a/domain_censys.py b/domain_censys.py index cf15fc5d..a1436b7d 100755 --- a/domain_censys.py +++ b/domain_censys.py @@ -9,6 +9,7 @@ def censys_search(domain): + global censys_list pages = float('inf') page = 1 @@ -27,13 +28,13 @@ def censys_search(domain): proto = r["protocols"] proto = [p.split("/")[0] for p in proto] proto.sort(key=float) - protoList = ','.join(map(str, proto)) + protoList = ','.join(map(str, proto)) temp_dict["ip"] = ip - temp_dict["protocols"] = protoList - + temp_dict["protocols"] = protoList + #print '[%s] IP: %s - aaProtocols: %s' % (colored('*', 'red'), ip, protoList) - + if '80' in protoList: new_dict = view(ip, temp_dict) censys_list.append(new_dict) @@ -49,7 +50,7 @@ def censys_search(domain): def view(server, temp_dict): res = requests.get("https://www.censys.io/api/v1/view/ipv4/%s" % (server), auth = (cfg.censysio_id, cfg.censysio_secret)) - payload = res.json() + payload = res.json() try: if 'title' in payload['80']['http']['get'].keys(): @@ -58,7 +59,7 @@ def view(server, temp_dict): temp_dict['title'] = title if 'server' in payload['80']['http']['get']['headers'].keys(): header = "[+] Server: %s" % payload['80']['http']['get']['headers']['server'] - temp_dict["server_header"] = payload['80']['http']['get']['headers']['server'] + temp_dict["server_header"] = payload['80']['http']['get']['headers']['server'] return temp_dict except Exception as error: @@ -70,6 +71,7 @@ def view(server, temp_dict): def main(): domain = sys.argv[1] + print "Censys starting at: " + domain censys_search(domain) for x in censys_list: print x diff --git a/domain_forumsearch.py b/domain_forumsearch.py index dbd5931b..c19cbefd 100755 --- a/domain_forumsearch.py +++ b/domain_forumsearch.py @@ -16,7 +16,7 @@ def boardsearch_forumsearch(domain): print colored(style.BOLD + '\n---> Gathering links from Forums:\n' + style.END, 'blue') time.sleep(0.3) req = requests.get('http://boardreader.com/index.php?a=l&q=%s&d=0&extended_search=1&q1=%s<ype=all&p=50'%(domain,domain)) - soup=BeautifulSoup(req.content, "lxml") + soup=BeautifulSoup(req.content.encode('utf-8'), "lxml") text=soup.findAll('bdo',{"dir":"ltr"}) links={} for lk in text: diff --git a/domain_pastes.py b/domain_pastes.py index ef1f5ae5..75dde615 100755 --- a/domain_pastes.py +++ b/domain_pastes.py @@ -40,7 +40,8 @@ def google_search(domain,start_index): print "Title: %s\nURL: %s\nSnippet: %s\n" % (x['title'], colorize(x['link']), colorize(x['snippet'])) start_index = +1 return int(results['searchInformation']['totalResults']) - elif results['searchInformation']['totalResults'] == "0": + #searchInformation isn't guaranteed to be in results + elif 'searchInformation' in results and results['searchInformation']['totalResults'] == "0": print '0 Results found' return 0 elif results['error']['code'] == 403: @@ -49,7 +50,7 @@ def google_search(domain,start_index): else: return 0 #return json.loads(res.text) - + def main(): domain = sys.argv[1] @@ -60,7 +61,7 @@ def main(): more_iters = (total_results / 10) if more_iters >= 10: print colored(style.BOLD + '\n---> Too many results, Daily API limit might exceed\n' + style.END, 'red') - for x in xrange(1,more_iters + 1): + for x in xrange(1,more_iters + 1): google_search(domain, (x*10)+1) print "\n\n-----------------------------\n" else: diff --git a/domain_subdomains.py b/domain_subdomains.py index 6283d72f..cd08092b 100755 --- a/domain_subdomains.py +++ b/domain_subdomains.py @@ -2,7 +2,7 @@ import sys import json -import requests +import requests from bs4 import BeautifulSoup import re from domain_pagelinks import pagelinks @@ -38,7 +38,7 @@ def subdomains(domain): headers['Referer'] = "https://dnsdumpster.com/" req = requests.post("https://dnsdumpster.com/", data = data, cookies = cookies, headers = headers) #print req.content - soup = BeautifulSoup(req.content, 'lxml') + soup = BeautifulSoup(req.content.encode('utf-8'), 'lxml') subdomains=soup.findAll('td',{"class":"col-md-4"}) for subd in subdomains: @@ -75,7 +75,7 @@ def find_subdomains_from_wolfram(domain): if recalculate != "": recalc_code = json.loads(req1.content)['queryresult']['recalculate'].split("=")[1].split("&")[0] - + #third request to get calc_id #print "http://www.wolframalpha.com/input/json.jsp?action=recalc&format=image,plaintext,imagemap,minput,moutput&id=%s&output=JSON&output=JSON&scantimeout=10&statemethod=deploybutton&storesubpodexprs=true" % (recalc_code) req2 = requests.get("http://www.wolframalpha.com/input/json.jsp?action=recalc&format=image,plaintext,imagemap,minput,moutput&id=%s&output=JSON&output=JSON&scantimeout=10&statemethod=deploybutton&storesubpodexprs=true" % (recalc_code), headers=headers, proxies=proxies) @@ -83,7 +83,7 @@ def find_subdomains_from_wolfram(domain): for x in pods: if "Web statistics for" in x['title']: async_code = x['async'].split('=')[1] - + #fourth request to get id for subdomains. req3 = requests.get("http://www.wolframalpha.com/input/json.jsp?action=asyncPod&format=image,plaintext,imagemap,minput,moutput&formattimeout=20&id=%s&output=JSON&podtimeout=20&statemethod=deploybutton&storesubpodexprs=true" % (async_code), headers=headers, proxies=proxies) for x in json.loads(req3.content)['pods'][0]['deploybuttonstates']: @@ -92,7 +92,7 @@ def find_subdomains_from_wolfram(domain): sub_code = x['input'] else: pass - + #fifth request to find few subdomains url = "http://www.wolframalpha.com/input/json.jsp?async=false&dbid=%s&format=image,plaintext,imagemap,sound,minput,moutput&includepodid=WebSiteStatisticsPod:InternetData&input=%s&output=JSON&podTitle=Web+statistics+for+all+of+%s&podstate=%s&s=%s&statemethod=deploybutton&storesubpodexprs=true&text=Subdomains" % (sub_code, domain, domain, sub_code, server_value) req4 = requests.get(url, headers = headers, proxies = proxies) @@ -109,9 +109,9 @@ def find_subdomains_from_wolfram(domain): else: more_code = "blank_bro" - #wooh, final request bitch. + #wooh, final request bitch. url = "http://www.wolframalpha.com/input/json.jsp?async=false&dbid=%s&format=image,plaintext,imagemap,sound,minput,moutput&includepodid=WebSiteStatisticsPod:InternetData&input=%s&output=JSON&podTitile=Subdomains&podstate=%s&s=%s&statemethod=deploybutton&storesubpodexprs=true&text=More" % (more_code, domain, more_code, servervalue_for_more) - req5 = requests.get(url, headers = headers, proxies = proxies) + req5 = requests.get(url, headers = headers, proxies = proxies) for x in json.loads(req5.content)['queryresult']['subpods']: if x['title'] == "Subdomains": temp_subdomain_list = x['plaintext'].split("\n") @@ -141,7 +141,7 @@ def subdomains_from_netcraft(domain): link_regx = re.compile('') links_list = link_regx.findall(req1.content) for x in links_list: - dom_name = x.split("/")[2].split(".") + dom_name = x.split("/")[2].split(".") if (dom_name[len(dom_name) - 1] == target_dom_name[1]) and (dom_name[len(dom_name) - 2] == target_dom_name[0]): check_and_append_subdomains(x.split("/")[2]) num_regex = re.compile('Found (.*) site') @@ -162,7 +162,7 @@ def subdomains_from_netcraft(domain): link_regx = re.compile('') links_list = link_regx.findall(req2.content) for y in links_list: - dom_name1 = y.split("/")[2].split(".") + dom_name1 = y.split("/")[2].split(".") if (dom_name1[len(dom_name1) - 1] == target_dom_name[1]) and (dom_name1[len(dom_name1) - 2] == target_dom_name[0]): check_and_append_subdomains(y.split("/")[2]) last_item = links_list[len(links_list) - 1].split("/")[2] diff --git a/domain_zoomeye.py b/domain_zoomeye.py index 1ed8d0f2..cf2220d5 100755 --- a/domain_zoomeye.py +++ b/domain_zoomeye.py @@ -27,8 +27,8 @@ def search_zoomeye(domain): zoomeye_token = get_accesstoken_zoomeye(domain) authData = {"Authorization": "JWT " + str(zoomeye_token)} req = requests.get('http://api.zoomeye.org/web/search/?query=site:%s&page=1' % domain, headers=authData) - return req.text - + return req.text.encode('utf-8') + def main(): domain = sys.argv[1] diff --git a/emailOsint.py b/emailOsint.py index 40b86d7a..860310dd 100755 --- a/emailOsint.py +++ b/emailOsint.py @@ -45,15 +45,15 @@ def haveIbeenpwned(email): return json.loads(req.content) else: return {} - + def gravatar(email): - gravatar_url = "http://www.gravatar.com/avatar/" + hashlib.md5(email.lower()).hexdigest() + gravatar_url = "http://www.gravatar.com/avatar/" + hashlib.md5(email.lower()).hexdigest() return gravatar_url def emaildom(email): req = requests.get('http://www.whoismind.com/email/%s.html'%(email)) - soup=BeautifulSoup(req.content, "lxml") + soup=BeautifulSoup(req.content.encode('utf-8'), "lxml") atag=soup.findAll('a') domains=[] for at in atag: @@ -91,9 +91,9 @@ def list_down_usernames(): print x print "\n" - + def print_emailosint(email): - + ''' hbp = haveIbeenpwned(email) if len(hbp) != 0: @@ -128,7 +128,7 @@ def print_emailosint(email): print '\nChat Accounts' for x in data.get("contactInfo","").get('chats', ''): print "\t%s on %s" % (x.get('handle', ''), x.get('client', '')) - + print colored(style.BOLD + '\n Social Profiles\n' + style.END, 'green') for x in data.get("socialProfiles",""): head = "\t%s:" % x.get('type','').upper() @@ -177,7 +177,7 @@ def print_emailosint(email): print doms ''' - + print colored(style.BOLD + '\n---> Finding Paste(s)..\n' + style.END, 'blue') if cfg.google_cse_key != "" and cfg.google_cse_key != "XYZ" and cfg.google_cse_cx != "" and cfg.google_cse_cx != "XYZ": total_results = google_search(email, 1) @@ -185,7 +185,7 @@ def print_emailosint(email): more_iters = (total_results / 10) if more_iters >= 10: print colored(style.BOLD + '\n---> Too many results, Daily API limit might exceed\n' + style.END, 'red') - for x in xrange(1,more_iters + 1): + for x in xrange(1,more_iters + 1): google_search(email, (x*10)+1) print "\n\n-----------------------------\n" else: @@ -202,7 +202,7 @@ def print_emailosint(email): else: print colored('[-] No Associated Slides found.', 'red') - + scdlinks=emailscribddocs(email) if len(scdlinks) != 0: print colored(style.BOLD + '\n---> Associated SCRIBD documents:\n' + style.END, 'blue') @@ -221,7 +221,7 @@ def print_emailosint(email): def main(): print_emailosint(email) list_down_usernames() - + if __name__ == "__main__": main() diff --git a/usernameOsint.py b/usernameOsint.py index c37a2cd2..08574a00 100755 --- a/usernameOsint.py +++ b/usernameOsint.py @@ -6,6 +6,7 @@ import requests import sys +import codecs import config as cfg import clearbit import json @@ -49,8 +50,8 @@ def extracting(prourl,tag,attribute,value,finattrib,profile): urllib.urlretrieve(img[finattrib], path) def profilepic(urls): - - + + if len(urls) or git_data['avatar_url']: if not os.path.exists(username): os.makedirs(username) @@ -79,7 +80,7 @@ def profilepic(urls): extracting(url,tg,att,val,valx,pro) continue except KeyError: - pass + pass elif 'youtube' in url: try: tg='link' @@ -90,7 +91,7 @@ def profilepic(urls): extracting(url,tg,att,val,valx,pro) continue except KeyError: - pass + pass elif 'twitter' in url: try: tg='img' @@ -236,17 +237,17 @@ def twitterdetails(username): #preparing auth api = tweepy.API(auth) - - f = open("temptweets.txt","w+") + + f = codecs.open("temptweets.txt","w+", encoding='utf-8') #writing tweets to temp file- last 1000 for tweet in tweepy.Cursor(api.user_timeline, id=username).items(1000): f.write(tweet.text.encode("utf-8")) f.write("\n") - + #extracting hashtags - f = open('temptweets.txt', 'r') + f = codecs.open('temptweets.txt', 'r', encoding='utf-8') q=f.read() strings = re.findall(r'(?:\#+[\w_]+[\w\'_\-]*[\w_]+)', q) #Regex(s) Source: https://marcobonzanini.com/2015/03/09/mining-twitter-data-with-python-part-2/ #extracting users @@ -259,15 +260,15 @@ def twitterdetails(username): item=item.strip( '#' ) item=item.lower() hashlist.append(item) - + hashlist=hashlist[:10] for itm in tusers: itm=itm.strip( '@' ) itm=itm.lower() userlist.append(itm) - + userlist=userlist[:10] - + return hashlist,userlist username = sys.argv[1] @@ -316,8 +317,8 @@ def twitterdetails(username): print "Top Hashtag Occurrence for user "+username+" based on last 1000 tweets" for hash,cnt in count: print "#"+hash+" : "+str(cnt) - print "\n" - + print "\n" + #counting user occurrence countu= Counter(userlist).most_common() print "Top User Occurrence for user "+username+" based on last 1000 tweets" diff --git a/username_gitscrape.py b/username_gitscrape.py index 02b626f5..9b6acb92 100644 --- a/username_gitscrape.py +++ b/username_gitscrape.py @@ -22,7 +22,7 @@ def find_repos(username): list_repos = [] url = "https://api.github.com/users/%s/repos?access_token=%s" % (username, access_token) req = requests.get(url) - if 'API rate limit exceeded' not in req.text: + if 'API rate limit exceeded' not in req.text.encode('utf-8'): for repos in json.loads(req.content): repos['full_name'] if repos['fork'] == False: From 4be715a5c5011a9c74fef7e3b8d1fd31b58cd5c4 Mon Sep 17 00:00:00 2001 From: Administrator Date: Sat, 11 Mar 2017 20:39:04 -0800 Subject: [PATCH 2/4] Added check for empty list before attempting to access it --- .idea/vcs.xml | 6 ++++++ domain_subdomains.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 .idea/vcs.xml diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/domain_subdomains.py b/domain_subdomains.py index cd08092b..638a1d56 100755 --- a/domain_subdomains.py +++ b/domain_subdomains.py @@ -146,10 +146,10 @@ def subdomains_from_netcraft(domain): check_and_append_subdomains(x.split("/")[2]) num_regex = re.compile('Found (.*) site') num_subdomains = num_regex.findall(req1.content) - if num_subdomains == []: + if num_subdomains : num_regex = re.compile('First (.*) sites returned') num_subdomains = num_regex.findall(req1.content) - if num_subdomains[0] != str(0): + if num_subdomains and num_subdomains[0] != str(0): num_pages = int(num_subdomains[0])/20+1 if num_pages > 1: last_regex = re.compile('%s.\n' % (20)) From 08e95ee3db78298e149d59c2565726c9a44175c9 Mon Sep 17 00:00:00 2001 From: Administrator Date: Sat, 11 Mar 2017 20:39:38 -0800 Subject: [PATCH 3/4] Added check for empty list before attempting to access it --- .idea/vcs.xml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .idea/vcs.xml diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7f..00000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From bf5ce28ec13c5e02a7c8f2061fdcb61dc9bc97d3 Mon Sep 17 00:00:00 2001 From: Paul Ganea Date: Sat, 11 Mar 2017 20:44:44 -0800 Subject: [PATCH 4/4] Added Dockerimages for faster deployment cycles and general use --- .gitignore | 3 ++ Dockerimages/Dockerfile | 36 +++++++++++++++++++++++ Dockerimages/Dockerfile.dev | 30 +++++++++++++++++++ Dockerimages/README.md | 48 +++++++++++++++++++++++++++++++ Dockerimages/service/celery/run | 6 ++++ Dockerimages/service/mongod/run | 1 + Dockerimages/service/rabbitmq/run | 2 ++ Dockerimages/service/server/run | 2 ++ 8 files changed, 128 insertions(+) create mode 100644 Dockerimages/Dockerfile create mode 100644 Dockerimages/Dockerfile.dev create mode 100644 Dockerimages/README.md create mode 100755 Dockerimages/service/celery/run create mode 100644 Dockerimages/service/mongod/run create mode 100755 Dockerimages/service/rabbitmq/run create mode 100755 Dockerimages/service/server/run diff --git a/.gitignore b/.gitignore index 3706297d..385fb59c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,10 +5,13 @@ __pycache__/ config.py /config.py +Dockerimages/config.py +Dockerimages/src/* facebook_user_details.py generate_passwords.py git_searcher.py + instaUsernameOsint.py ip_to_neighboursites.py test.py diff --git a/Dockerimages/Dockerfile b/Dockerimages/Dockerfile new file mode 100644 index 00000000..5438bf05 --- /dev/null +++ b/Dockerimages/Dockerfile @@ -0,0 +1,36 @@ +FROM debian:jessie-slim +MAINTAINER Paul Ganea +ENV C_FORCE_ROOT=root +RUN apt-get update -y && apt-get install -y build-essential \ + curl \ + git \ + gnupg \ + libxml2-dev \ + libxmlsec1-dev \ + python \ + python-dev \ + runit \ + unzip \ + vim \ + wget \ + zip \ + && apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 0C49F3730359A14518585931BC711F9BA15703C6 \ + && echo "deb http://repo.mongodb.org/apt/debian jessie/mongodb-org/3.4 main" | tee /etc/apt/sources.list.d/mongodb-org-3.4.list \ + && wget -O- https://www.rabbitmq.com/rabbitmq-release-signing-key.asc | apt-key add - \ + && echo 'deb http://www.rabbitmq.com/debian/ testing main' || tee /etc/apt/sources.list.d/rabbitmq.list && apt-get update -y \ + && apt-get install -y mongodb-org rabbitmq-server \ + && curl https://bootstrap.pypa.io/get-pip.py | python \ + && apt-get install -y libxml2-dev python-dev libxmlsec1-dev\ + && git clone https://github.com/DataSploit/datasploit \ + && cd datasploit \ + && mkdir /datasploit/datasploitDb \ + && pip install -r requirements.txt \ + && useradd -ms /bin/bash k4ch0w \ + && apt-get remove -y libxml2-dev python-dev libxmlsec1-dev curl wget build-essential \ + && rm -rf /var/lib/apt/lists/* + +USER k4ch0w +WORKDIR /datasploit +COPY service /etc/service/ +COPY config.py /datasploit/config.py +ENTRYPOINT ["runsvdir", "/etc/service"] diff --git a/Dockerimages/Dockerfile.dev b/Dockerimages/Dockerfile.dev new file mode 100644 index 00000000..577fd9df --- /dev/null +++ b/Dockerimages/Dockerfile.dev @@ -0,0 +1,30 @@ +FROM debian:jessie-slim +MAINTAINER Paul Ganea +ENV C_FORCE_ROOT=root +RUN apt-get update -y && apt-get install -y build-essential \ + curl \ + git \ + gnupg \ + libxml2-dev \ + libxmlsec1-dev \ + python \ + python-dev \ + runit \ + unzip \ + vim \ + wget \ + zip \ + && apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 0C49F3730359A14518585931BC711F9BA15703C6 \ + && echo "deb http://repo.mongodb.org/apt/debian jessie/mongodb-org/3.4 main" | tee /etc/apt/sources.list.d/mongodb-org-3.4.list \ + && wget -O- https://www.rabbitmq.com/rabbitmq-release-signing-key.asc | apt-key add - \ + && echo 'deb http://www.rabbitmq.com/debian/ testing main' || tee /etc/apt/sources.list.d/rabbitmq.list && apt-get update -y \ + && apt-get install -y mongodb-org rabbitmq-server \ + && curl https://bootstrap.pypa.io/get-pip.py | python +COPY src/ /datasploit/ +WORKDIR /datasploit +RUN cd /datasploit && mkdir /datasploit/datasploitDb \ + && pip install -r requirements.txt +COPY service /etc/service/ +#Provide your own config.py! Do not commit it to your forked repo! +COPY config.py /datasploit/config.py +ENTRYPOINT ["runsvdir", "/etc/service"] diff --git a/Dockerimages/README.md b/Dockerimages/README.md new file mode 100644 index 00000000..98a01709 --- /dev/null +++ b/Dockerimages/README.md @@ -0,0 +1,48 @@ +# Datasploit dockerimages + +### You will need to provide config.py file for both these images! + +### Grab Coffee :coffee: on build it takes about ~5 minutes + +## Development image: +### This image allows you test source code changes +```bash + +$ cp $EDITED_SOURCE_CODE_DIR src/ +$ docker build -t="dasploit/datasploit" -f Dockerfile.dev . +$ docker run -d --name="datasploit" datasploit/datasploit +$ docker exec -it datasploit bash +root@61e05f5b7776:/datasploit# + +``` + + +## Working docker image +### This image clones from the master branch + +```bash +$ docker build -t="datasploit/datasploit" -f Dockerfile . +$ docker run -d --name="datasploit" datsploit/datasploit +$ docker exec -it datasploit bash +k4ch0w@5722c53edc24:/datasploit$ python domainOsint.py + + ____/ /____ _ / /_ ____ _ _____ ____ / /____ (_)/ /_ + / __ // __ `// __// __ `// ___// __ \ / // __ \ / // __/ + / /_/ // /_/ // /_ / /_/ /(__ )/ /_/ // // /_/ // // /_ + \__,_/ \__,_/ \__/ \__,_//____// .___//_/ \____//_/ \__/ + /_/ + + Open Source Assistant for #OSINT + website: www.datasploit.info + +[-] Invalid argument passed. +Usage: domainOsint.py [options] + +Options: + -h, --help show this help message and exit + -d DOMAIN, --domain=DOMAIN Domain name against which automated Osint is to be performed. +k4ch0w@5722c53edc24:/datasploit$ +``` + + + diff --git a/Dockerimages/service/celery/run b/Dockerimages/service/celery/run new file mode 100755 index 00000000..be5f8ada --- /dev/null +++ b/Dockerimages/service/celery/run @@ -0,0 +1,6 @@ +#!/bin/bash + +C_FORCE_ROOT=root +cd /datasploit/core +celery -A core worker -l info --concurrency 20 + diff --git a/Dockerimages/service/mongod/run b/Dockerimages/service/mongod/run new file mode 100644 index 00000000..d6c14b43 --- /dev/null +++ b/Dockerimages/service/mongod/run @@ -0,0 +1 @@ +mongod --dbpath /datasploit/datasploitDb diff --git a/Dockerimages/service/rabbitmq/run b/Dockerimages/service/rabbitmq/run new file mode 100755 index 00000000..fc08e70b --- /dev/null +++ b/Dockerimages/service/rabbitmq/run @@ -0,0 +1,2 @@ +#!/bin/sh +rabbitmq-server diff --git a/Dockerimages/service/server/run b/Dockerimages/service/server/run new file mode 100755 index 00000000..3ba3b314 --- /dev/null +++ b/Dockerimages/service/server/run @@ -0,0 +1,2 @@ +#!/bin/sh +python /datasploit/core/manage.py runserver 0.0.0.0:8000