From d9dd162f00ada22c28bb6b4618ddde3c1d97594f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Breuer?= Date: Tue, 16 May 2023 08:51:32 +0200 Subject: [PATCH 1/5] Remove anyrun, since cfscrape library needs to fixed. With the latest urllib3 version we get: "ImportError: cannot import name 'DEFAULT_CIPHERS' from 'urllib3.util.ssl_'" --- munin.py | 51 +++++++-------------------------------------------- 1 file changed, 7 insertions(+), 44 deletions(-) diff --git a/munin.py b/munin.py index c5dad7d..84e4d74 100755 --- a/munin.py +++ b/munin.py @@ -40,7 +40,6 @@ from lib.munin_csv import writeCSVHeader, writeCSV, CSV_FIELDS import lib.connections as connections from lib.munin_stdout import printResult, printHighlighted, printKeyLine -import cfscrape # Handle modules that may be difficult to install # e.g. pymisp has no Debian package, selenium is obsolete @@ -208,9 +207,6 @@ def processLine(line, debug): # URLhaus uh_info = getURLhaus(info['md5'], info['sha256']) info.update(uh_info) - # AnyRun - #ar_info = getAnyRun(info['sha256']) - #info.update(ar_info) # CAPE ca_info = getCAPE(info['md5'], info['sha1'], info['sha256']) info.update(ca_info) @@ -438,13 +434,13 @@ def getIntezerInfo(sha256): response.raise_for_status() session = requests.session() session.headers['Authorization'] = session.headers['Authorization'] = 'Bearer %s' % response.json()['result'] - + response = session.get(INTEZER_URL + '/files/{}'.format(hash)) if response.status_code == 404 or response.status_code == 410: return info else: info['intezer_available'] = True - + response.raise_for_status() report = response.json() if args.debug: @@ -850,39 +846,6 @@ def getCAPE(md5, sha1, sha256): return info -def getAnyRun(sha256): - """ - Retrieves information from AnyRun Service - :param sha256: hash value - :return info: info object - """ - info = {'anyrun_available': False} - if sha256 == "-": - return info - try: - - if args.debug: - print("[D] Querying Anyrun") - cfscraper = cfscrape.create_scraper() - response = cfscraper.get(URL_ANYRUN % sha256, proxies=connections.PROXY) - - - if args.debug: - print("[D] Anyrun Response Code: %s" %response.status_code) - - if response.status_code == 200: - info['anyrun_available'] = True - except ConnectionError as e: - print("Error while accessing AnyRun: connection failed") - if args.debug: - traceback.print_exc() - except Exception as e: - print("Error while accessing AnyRun") - if args.debug: - traceback.print_exc() - return info - - def getVirusBayInfo(hash): """ Retrieves information from VirusBay https://beta.virusbay.io/ @@ -1250,7 +1213,7 @@ def showVTquota(): parser.add_argument('--cli', action='store_true', help='Run Munin in command line interface mode', default=False) parser.add_argument('--rescan', action='store_true', help='Trigger a rescan of each analyzed file', default=False) parser.add_argument('--debug', action='store_true', default=False, help='Debug output') - + args = parser.parse_args() @@ -1434,7 +1397,7 @@ def showVTquota(): except Exception as e: traceback.print_exc() - # Query Valhalla for hashes matching the search word + # Query Valhalla for hashes matching the search word if args.vh: if not VALHALLA_API_KEY or VALHALLA_API_KEY == "-": print("[E] Cannot query Valhalla without API Key") @@ -1473,14 +1436,14 @@ def showVTquota(): print("Problems with converting timestamp %s" % timestamp_str) if VH_RULE_CUTOFF: - # skip sample if - # - we already have it in cache or + # skip sample if + # - we already have it in cache or # - it's too old for --vhmaxage # - enough samples from this rule if inCache(hashh) or \ now - vhmaxage > timestamp_hash or \ ( - rule_count[rulename] and + rule_count[rulename] and len(rule_count) / rule_count[rulename] > VH_RULE_CUTOFF and # only skip after having 10+ samples of this rule to avoid problems on a fresh vt-hash-db.json rule_count[rulename] > 10 From dd9b8247e957e40d8fd1f7a4aeddb74953b29dfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Breuer?= Date: Tue, 16 May 2023 08:59:30 +0200 Subject: [PATCH 2/5] Annotate that MD5 and SHA1 are not used for security, c.f. CWE-327. --- munin.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/munin.py b/munin.py index 84e4d74..4f85051 100755 --- a/munin.py +++ b/munin.py @@ -1118,8 +1118,8 @@ def generateHashes(fileData): """ hashes = {'md5': '', 'sha1': '', 'sha256': ''} try: - md5 = hashlib.md5() - sha1 = hashlib.sha1() + md5 = hashlib.md5(usedforsecurity=False) + sha1 = hashlib.sha1(usedforsecurity=False) sha256 = hashlib.sha256() md5.update(fileData) sha1.update(fileData) From b13924e8644367169608aec6cfee26b38ac8d910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Breuer?= Date: Tue, 16 May 2023 09:05:12 +0200 Subject: [PATCH 3/5] Add timeout in requests to avoid hanging indefinitely. --- munin.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/munin.py b/munin.py index 4f85051..f893088 100755 --- a/munin.py +++ b/munin.py @@ -345,10 +345,11 @@ def getMalShareInfo(hash): return info try: #print("Malshare URL: %s" % (MAL_SHARE_API % (MAL_SHARE_API_KEY, hash))) - response_query = requests.get(MAL_SHARE_API % (MAL_SHARE_API_KEY, hash), - timeout=15, - proxies=connections.PROXY, - headers=FAKE_HEADERS) + response_query = requests.get( + MAL_SHARE_API % (MAL_SHARE_API_KEY, hash), + timeout=15, + proxies=connections.PROXY, + headers=FAKE_HEADERS) if args.debug: print("[D] Querying Malshare: %s" % response_query.request.url) #print(response_query.content) @@ -640,7 +641,10 @@ def getValhalla(sha256): "sha256": sha256, "apikey": VALHALLA_API_KEY, } - response = requests.post(VALHALLA_URL, data=data, proxies=connections.PROXY) + response = requests.post(VALHALLA_URL, + data=data, + proxies=connections.PROXY, + timeout=15) if args.debug: print("[D] VALHALLA Response: '%s'" % response.json()) res = response.json() @@ -673,7 +677,11 @@ def downloadHybridAnalysisSample(hash): # Querying Hybrid Analysis if args.debug: print("[D] Requesting download of sample: %s" % preparedURL) - response = requests.get(preparedURL, params={'environmentId':'100'}, headers=headers, proxies=connections.PROXY) + response = requests.get(preparedURL, + params={'environmentId':'100'}, + headers=headers, + proxies=connections.PROXY, + timeout=15) # If the response is a json file if response.headers["Content-Type"] == "application/json": @@ -763,7 +771,9 @@ def getTotalHashInfo(sha1): # Querying Hybrid Analysis if args.debug: print("[D] Querying Totalhash: %s" % preparedURL) - response = requests.get(preparedURL, proxies=connections.PROXY) + response = requests.get(preparedURL, + proxies=connections.PROXY, + timeout=15) # print "Response: '%s'" % response.content if response.content and \ '0 of 0 results' not in response.content and \ @@ -860,7 +870,9 @@ def getVirusBayInfo(hash): preparedURL = "%s%s" % (VIRUSBAY_URL, hash) if args.debug: print("[D] Querying Virusbay: %s" % preparedURL) - response = requests.get(preparedURL, proxies=connections.PROXY).json() + response = requests.get(preparedURL, + proxies=connections.PROXY, + timeout=15).json() # If response has the correct content info['virusbay_available'] = False #print(response) From 569f684c4d2e9fefbc79fbed3c13652bf2d6dac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Breuer?= Date: Tue, 16 May 2023 09:35:33 +0200 Subject: [PATCH 4/5] Add some compliance information. --- munin.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/munin.py b/munin.py index f893088..7c7e3e2 100755 --- a/munin.py +++ b/munin.py @@ -1,7 +1,9 @@ #!/usr/bin/env python3 +# coding: utf-8 __AUTHOR__ = 'Florian Roth' __VERSION__ = "0.22.0 January 2023" +__LICENSE__ = "Apache-2.0" """ Install dependencies with: From 2022c505dfde75670ec20f234152ec488bce2734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Breuer?= Date: Tue, 16 May 2023 09:36:49 +0200 Subject: [PATCH 5/5] Fix broken CSV output due to delimiter characters in some fields. --- lib/munin_csv.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/munin_csv.py b/lib/munin_csv.py index 14b6bf0..293bf9f 100644 --- a/lib/munin_csv.py +++ b/lib/munin_csv.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python3 +# coding: utf-8 + import codecs import traceback @@ -54,24 +57,24 @@ def writeCSV(info, resultFile): """ try: with codecs.open(resultFile, 'a', encoding='utf8') as fh_results: + fields = [] # Print every field from the field list to the output file for field_pretty in CSV_FIELD_ORDER: field = CSV_FIELDS[field_pretty] - try: - field = info[field] - except KeyError as e: - field = "False" + field = info.get(field, "False") try: field = str(field).replace(r'"', r'\"').replace("\n", " ") except AttributeError as e: traceback.print_exc() - fh_results.write("%s;" % field) + fields.append(field.replace(";", ",")) # Append vendor scan results for vendor in VENDORS: if vendor in info['vendor_results']: - fh_results.write("%s;" % info['vendor_results'][vendor]) + fields.append(info['vendor_results'][vendor].replace(";", ",")) else: - fh_results.write("-;") + fields.append("-") + + fh_results.write(";".join(fields)) fh_results.write('\n') except: traceback.print_exc()