diff --git a/lib/munin_csv.py b/lib/munin_csv.py index 14b6bf0..a7c736d 100644 --- a/lib/munin_csv.py +++ b/lib/munin_csv.py @@ -1,10 +1,13 @@ +#!/usr/bin/env python3 +# coding: utf-8 + import codecs import traceback # only write top10 vendors to CSV because file format can't handle changing number of them VENDORS = ['Microsoft', 'Kaspersky', 'McAfee', 'CrowdStrike', 'TrendMicro', 'ESET-NOD32', 'Symantec', 'F-Secure', 'Sophos', 'GData'] -CSV_FIELD_ORDER = ['Lookup Hash', 'Rating', 'Comment', 'Positives', 'File Size', 'Virus', 'File Names', 'First Submitted', +CSV_FIELD_ORDER = ['Lookup Hash', 'Rating', 'Comment', 'Positives', 'Total Checks', 'File Size', 'Virus', 'File Names', 'First Submitted', 'Last Submitted', 'File Type', 'MD5', 'SHA1', 'SHA256', 'Imphash', 'Matching Rule', 'Harmless', 'Revoked', 'Expired', 'Trusted', 'Signed', 'Signer', 'Hybrid Analysis Sample', 'MalShare Sample', 'VirusBay Sample', 'MISP', 'MISP Events', 'URLhaus', 'AnyRun', 'CAPE', 'VALHALLA', 'User Comments'] @@ -14,6 +17,7 @@ 'Comment': 'comment', 'Matching Rule': 'matching_rule', 'Positives': 'positives', + 'Total Checks': 'total', 'Virus': 'virus', 'File Names': 'filenames', 'First Submitted': 'first_submitted', @@ -54,24 +58,24 @@ def writeCSV(info, resultFile): """ try: with codecs.open(resultFile, 'a', encoding='utf8') as fh_results: + fields = [] # Print every field from the field list to the output file for field_pretty in CSV_FIELD_ORDER: field = CSV_FIELDS[field_pretty] - try: - field = info[field] - except KeyError as e: - field = "False" + field = info.get(field, "False") try: field = str(field).replace(r'"', r'\"').replace("\n", " ") except AttributeError as e: traceback.print_exc() - fh_results.write("%s;" % field) + fields.append(field.replace(";", ",")) # Append vendor scan results for vendor in VENDORS: if vendor in info['vendor_results']: - fh_results.write("%s;" % info['vendor_results'][vendor]) + fields.append(info['vendor_results'][vendor].replace(";", ",")) else: - fh_results.write("-;") + fields.append("-") + + fh_results.write(";".join(fields)) fh_results.write('\n') except: traceback.print_exc() diff --git a/munin.py b/munin.py index c5dad7d..7c7e3e2 100755 --- a/munin.py +++ b/munin.py @@ -1,7 +1,9 @@ #!/usr/bin/env python3 +# coding: utf-8 __AUTHOR__ = 'Florian Roth' __VERSION__ = "0.22.0 January 2023" +__LICENSE__ = "Apache-2.0" """ Install dependencies with: @@ -40,7 +42,6 @@ from lib.munin_csv import writeCSVHeader, writeCSV, CSV_FIELDS import lib.connections as connections from lib.munin_stdout import printResult, printHighlighted, printKeyLine -import cfscrape # Handle modules that may be difficult to install # e.g. pymisp has no Debian package, selenium is obsolete @@ -208,9 +209,6 @@ def processLine(line, debug): # URLhaus uh_info = getURLhaus(info['md5'], info['sha256']) info.update(uh_info) - # AnyRun - #ar_info = getAnyRun(info['sha256']) - #info.update(ar_info) # CAPE ca_info = getCAPE(info['md5'], info['sha1'], info['sha256']) info.update(ca_info) @@ -349,10 +347,11 @@ def getMalShareInfo(hash): return info try: #print("Malshare URL: %s" % (MAL_SHARE_API % (MAL_SHARE_API_KEY, hash))) - response_query = requests.get(MAL_SHARE_API % (MAL_SHARE_API_KEY, hash), - timeout=15, - proxies=connections.PROXY, - headers=FAKE_HEADERS) + response_query = requests.get( + MAL_SHARE_API % (MAL_SHARE_API_KEY, hash), + timeout=15, + proxies=connections.PROXY, + headers=FAKE_HEADERS) if args.debug: print("[D] Querying Malshare: %s" % response_query.request.url) #print(response_query.content) @@ -438,13 +437,13 @@ def getIntezerInfo(sha256): response.raise_for_status() session = requests.session() session.headers['Authorization'] = session.headers['Authorization'] = 'Bearer %s' % response.json()['result'] - + response = session.get(INTEZER_URL + '/files/{}'.format(hash)) if response.status_code == 404 or response.status_code == 410: return info else: info['intezer_available'] = True - + response.raise_for_status() report = response.json() if args.debug: @@ -644,7 +643,10 @@ def getValhalla(sha256): "sha256": sha256, "apikey": VALHALLA_API_KEY, } - response = requests.post(VALHALLA_URL, data=data, proxies=connections.PROXY) + response = requests.post(VALHALLA_URL, + data=data, + proxies=connections.PROXY, + timeout=15) if args.debug: print("[D] VALHALLA Response: '%s'" % response.json()) res = response.json() @@ -677,7 +679,11 @@ def downloadHybridAnalysisSample(hash): # Querying Hybrid Analysis if args.debug: print("[D] Requesting download of sample: %s" % preparedURL) - response = requests.get(preparedURL, params={'environmentId':'100'}, headers=headers, proxies=connections.PROXY) + response = requests.get(preparedURL, + params={'environmentId':'100'}, + headers=headers, + proxies=connections.PROXY, + timeout=15) # If the response is a json file if response.headers["Content-Type"] == "application/json": @@ -767,7 +773,9 @@ def getTotalHashInfo(sha1): # Querying Hybrid Analysis if args.debug: print("[D] Querying Totalhash: %s" % preparedURL) - response = requests.get(preparedURL, proxies=connections.PROXY) + response = requests.get(preparedURL, + proxies=connections.PROXY, + timeout=15) # print "Response: '%s'" % response.content if response.content and \ '0 of 0 results' not in response.content and \ @@ -850,39 +858,6 @@ def getCAPE(md5, sha1, sha256): return info -def getAnyRun(sha256): - """ - Retrieves information from AnyRun Service - :param sha256: hash value - :return info: info object - """ - info = {'anyrun_available': False} - if sha256 == "-": - return info - try: - - if args.debug: - print("[D] Querying Anyrun") - cfscraper = cfscrape.create_scraper() - response = cfscraper.get(URL_ANYRUN % sha256, proxies=connections.PROXY) - - - if args.debug: - print("[D] Anyrun Response Code: %s" %response.status_code) - - if response.status_code == 200: - info['anyrun_available'] = True - except ConnectionError as e: - print("Error while accessing AnyRun: connection failed") - if args.debug: - traceback.print_exc() - except Exception as e: - print("Error while accessing AnyRun") - if args.debug: - traceback.print_exc() - return info - - def getVirusBayInfo(hash): """ Retrieves information from VirusBay https://beta.virusbay.io/ @@ -897,7 +872,9 @@ def getVirusBayInfo(hash): preparedURL = "%s%s" % (VIRUSBAY_URL, hash) if args.debug: print("[D] Querying Virusbay: %s" % preparedURL) - response = requests.get(preparedURL, proxies=connections.PROXY).json() + response = requests.get(preparedURL, + proxies=connections.PROXY, + timeout=15).json() # If response has the correct content info['virusbay_available'] = False #print(response) @@ -1155,8 +1132,8 @@ def generateHashes(fileData): """ hashes = {'md5': '', 'sha1': '', 'sha256': ''} try: - md5 = hashlib.md5() - sha1 = hashlib.sha1() + md5 = hashlib.md5(usedforsecurity=False) + sha1 = hashlib.sha1(usedforsecurity=False) sha256 = hashlib.sha256() md5.update(fileData) sha1.update(fileData) @@ -1250,7 +1227,7 @@ def showVTquota(): parser.add_argument('--cli', action='store_true', help='Run Munin in command line interface mode', default=False) parser.add_argument('--rescan', action='store_true', help='Trigger a rescan of each analyzed file', default=False) parser.add_argument('--debug', action='store_true', default=False, help='Debug output') - + args = parser.parse_args() @@ -1434,7 +1411,7 @@ def showVTquota(): except Exception as e: traceback.print_exc() - # Query Valhalla for hashes matching the search word + # Query Valhalla for hashes matching the search word if args.vh: if not VALHALLA_API_KEY or VALHALLA_API_KEY == "-": print("[E] Cannot query Valhalla without API Key") @@ -1473,14 +1450,14 @@ def showVTquota(): print("Problems with converting timestamp %s" % timestamp_str) if VH_RULE_CUTOFF: - # skip sample if - # - we already have it in cache or + # skip sample if + # - we already have it in cache or # - it's too old for --vhmaxage # - enough samples from this rule if inCache(hashh) or \ now - vhmaxage > timestamp_hash or \ ( - rule_count[rulename] and + rule_count[rulename] and len(rule_count) / rule_count[rulename] > VH_RULE_CUTOFF and # only skip after having 10+ samples of this rule to avoid problems on a fresh vt-hash-db.json rule_count[rulename] > 10