diff --git a/scanning/Dockerfile b/scanning/Dockerfile new file mode 100644 index 0000000..072cce7 --- /dev/null +++ b/scanning/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.9-slim-buster + +# install trivy - https://aquasecurity.github.io/trivy/v0.19.0/advanced/container/embed-in-dockerfile/ +COPY --from=aquasec/trivy:latest /usr/local/bin/trivy /usr/local/bin/trivy + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/src/app + +COPY requirements.txt find.py receiver.py scan.py ./ + +RUN pip install --no-cache-dir -r requirements.txt + +CMD [ "/bin/bash", "-c", "python ./scan.py experiment"] \ No newline at end of file diff --git a/scanning/README.md b/scanning/README.md index 5a8a3b7..3e4ab0c 100644 --- a/scanning/README.md +++ b/scanning/README.md @@ -1,23 +1,22 @@ # Image Scanner -Wraps around `docker pull` and scans an image with Clair. Will print the image +Wraps around the trivy image scanner. Will print the image tag and the highest CVE severity level found in the image. +> The scanning component has been migrated from Clair to trivy for an easier integration in pipeline or CI/CD usecases. Some functions may not be fully tested with the new scanning engine. + ## Prerequisites -- Install Docker and docker-compose -- Install Klar, erlang and RabbitMQ server with `scanner_setup.sh` +- Install Docker +- Install trivy, options can be found [here](https://aquasecurity.github.io/trivy/latest/getting-started/installation/) +- Install Klar, erlang and RabbitMQ server with `scanner_setup.sh` (optional for scanner only use-case) - `pip install -r requirements.txt` -## Setup +## Build Scanner Container -Run clair: ``` -cd clair-test -sudo docker-compose up -d +docker build -t /: . ``` -Keep in mind that clair takes a while to populate the CVE database the first time, -and during that time it will return no vulnerabilities. This takes 20-30 minutes. ## Deploying on Openshift (Experimental) @@ -33,41 +32,46 @@ to select HTTP or RabbitMQ mode, to match the mode the rule engine operates in. ## Usage -The program has 3 modes, 'cluster', 'detail' and 'pull'. +The program has 4 modes, 'cluster', 'detail', 'scan' and 'experiment'. + +Available commands are: + +``` +python3 scan.py scan # run scan mode on single image +python3 scan.py detail # run detailed mode on single image +python3 scan.py experiment # scan all DockerHub library images +python3 scan.py cluster # scan all images found on an OpenShift cluster +``` -### Pull/push mode +### Scan mode -Pull mode will pull the image to the local registry and scan it with clair. +The scanner will scan the given image using trivy. It will print the 'message' that will be sent to the policy engine. -Push mode can be used for a custom image. +This mode can also be used for local images. Example: ``` -$ python3 scan.py pull nginx:latest +$ python3 scan.py scan nginx:latest ... -Image: nginx:latest, Result: 3 +{'image': 'local/executor:latest', 'level': 4, 'avg': 2.3706896551724137} ``` The severity levels for clair are: -- None (0) -- Unknown (1) -- Negligible (2) -- Low (3) -- Medium (4) -- High (5) -- Critical (6) -- Defcon1 (7) +- UNKNOWN (1) +- LOW (2) +- MEDIUM (3) +- HIGH (4) +- CRITICAL (5) -Refer to clair's documentation for a detailed explanation of the severity levels. -The program will return 'None' if no vlunerabilities are found by clair. +Refer to trivy's documentation for a detailed explanation of the severity levels. -This option will submit the result to the rule engine via RabbitMQ. +This option will submit the result to the rule engine via RabbitMQ or HTTP. ### Detail mode -If the detailed output from Clair is desired, run the above example as follows: +If the detailed output from trivy is desired, run the above example as follows: ``` $ python3 scan.py detail nginx:latest @@ -92,14 +96,11 @@ $ python3 scan.py cluster Experiment mode will scan all images of the official `library` registry to obtain an overview of the security status of official images. -Be aware that this -mode will use a large amount of storage and despite cleaning up, some data will -remain. As of mid April 2020 it can consume ~80 GB and only clean up ~30 GB. - - Here are some sample results, limited to 'Low' vulnerabilities or higher. +This mode offers two option that can be set via environment vairables: -- Total images: 160 -- Images scanned successfully: 148 -- At least one Low: 108 (73%) -- At least one Medium: 39 (26%) -- At least one High: 6 (4%) +- `CIPOLICE_TIMESTAMPED_OUTPUT` + - `False` (default) - write output files without timestamp + - `True` - write output files with date timestamp (e.g. `2021-01-01-max.json`) +- `CIPOLICE_OUTPUT_PATH` + - `.` (default) - write output files to current directory + - `` - path to write output files to diff --git a/scanning/scan.py b/scanning/scan.py index 60d07ec..5b87db4 100644 --- a/scanning/scan.py +++ b/scanning/scan.py @@ -1,11 +1,12 @@ +from typing import Counter +from datetime import datetime +import os import requests import pika import json import subprocess import sys import find -import calendar -import time def get_names(): @@ -38,6 +39,7 @@ def oc_find(): print(images) return(images) + def push(image, tag): subprocess.call(f'docker tag {image} localhost:5000/{image}-test', shell=True) subprocess.call(f'docker push localhost:5000/{image}-test', shell=True) @@ -45,31 +47,18 @@ def push(image, tag): return json.loads(p.stdout) -def pull(image, tag): - subprocess.call(f'docker pull {image}:{tag}', shell=True) - p = push(image, tag) - """ - try: - images = oc_find() - if f'{image}:{tag}' in images: - print(f"Warning, image {image} is currently in use") - else: - print("Scanned image has not been deployed to the cluster") - except: - print("Warning, cannot check cluster status") - """ - return p +def scan(image, tag): + p = subprocess.run(f'TRIVY_NEW_JSON_SCHEMA=true trivy -q image -f json {image}:{tag}', shell=True, stdout=subprocess.PIPE) + return json.loads(p.stdout) def scan_all(names): results = { - "Unknown": 0, - "Negligible": 0, - "Low": 0, - "Medium": 0, - "High": 0, - "Critical": 0, - "Defcon1": 0 + "UNKNOWN": 0, + "LOW": 0, + "MEDIUM": 0, + "HIGH": 0, + "CRITICAL": 0, } results_avg = {} results_conv = {} @@ -77,99 +66,96 @@ def scan_all(names): errors = 0 for image in names: try: - result = check(image, 'latest','pull') + print(f"checking first image: {image}") + result = check(image, 'latest') print(result) results_all[image] = result if result[0] == 1: - results['Unknown'] += 1 + results['UNKNOWN'] += 1 if result[0] == 2: - results['Negligible'] += 1 + results['LOW'] += 1 if result[0] == 3: - results['Low'] += 1 + results['MEDIUM'] += 1 if result[0] == 4: - results['Medium'] += 1 + results['HIGH'] += 1 if result[0] == 5: - results['High'] += 1 - if result[0] == 6: - results['Critical'] += 1 - if result[0] == 7: - results['Defcon1'] += 1 + results['CRITICAL'] += 1 results_avg[image] = result[1] results_conv[image] = result[2] - except: + except Exception as e: + print(e) errors += 1 print(results) print(results_avg) print(results_conv) print (f'Errors: {errors}') print(len(names)) - with open('max.json', 'w') as f: + + timestamped = os.getenv('CIPOLICE_TIMESTAMPED_OUTPUT', 'False') + timestamp = '' + if timestamped == 'True': + timestamp = datetime.today().strftime('%Y-%m-%d-') + + output_path = os.getenv('CIPOLICE_OUTPUT_PATH', '.') + + with open(f'{output_path}/{timestamp}max.json', 'w') as f: json.dump(results, f, indent=2) - with open('avg.json', 'w') as f: + with open(f'{output_path}/{timestamp}avg.json', 'w') as f: json.dump(results_avg, f, indent=2) - with open('conv.json', 'w') as f: + with open(f'{output_path}/{timestamp}conv.json', 'w') as f: json.dump(results_conv, f, indent=2) return results_all -def check(image, tag, mode): - if mode == 'pull': - result = pull(image, tag) - elif mode == 'push': - result = push(image, tag) +def check(image, tag): + result = scan(image, tag) tilmax = 0 tilavg = 0 - if 'Defcon1' in result['Vulnerabilities']: - tilmax = 7 - elif 'Critical' in result['Vulnerabilities']: - tilmax = 6 - elif 'High' in result['Vulnerabilities']: - tilmax = 5 - elif 'Medium' in result['Vulnerabilities']: - tilmax = 4 - elif 'Low' in result['Vulnerabilities']: - tilmax = 3 - elif 'Negligible' in result['Vulnerabilities']: - tilmax = 2 - elif 'Unknown' in result['Vulnerabilities']: - tilmax = 1 - else: - tilmax = 0 - sum = 0 + summary = Counter() count = 0 - if 'Defcon1' in result['Vulnerabilities']: - sum += len(result['Vulnerabilities']['Defcon1'])*7 - count += len(result['Vulnerabilities']['Defcon1']) - if 'Critical' in result['Vulnerabilities']: - sum += len(result['Vulnerabilities']['Critical'])*6 - count += len(result['Vulnerabilities']['Critical']) - if 'High' in result['Vulnerabilities']: - sum += len(result['Vulnerabilities']['High'])*5 - count += len(result['Vulnerabilities']['High']) - if 'Medium' in result['Vulnerabilities']: - sum += len(result['Vulnerabilities']['Medium'])*4 - count += len(result['Vulnerabilities']['Medium']) - if 'Low' in result['Vulnerabilities']: - sum += len(result['Vulnerabilities']['Low'])*3 - count += len(result['Vulnerabilities']['Low']) - if 'Negligible' in result['Vulnerabilities']: - sum += len(result['Vulnerabilities']['Negligible'])*2 - count += len(result['Vulnerabilities']['Negligible']) - if 'Unknown' in result['Vulnerabilities']: - sum += len(result['Vulnerabilities']['Unknown']) - count += len(result['Vulnerabilities']['Unknown']) + if 'Results' in result: + for res in result['Results']: + if 'Vulnerabilities' in res: + for vuln in res['Vulnerabilities']: + count += 1 + if 'CRITICAL' in vuln['Severity']: + tilmax = 5 if tilmax <= 5 else tilmax + summary['CRITICAL'] += 1 + elif 'HIGH' in vuln['Severity']: + tilmax = 4 if tilmax <= 4 else tilmax + summary['HIGH'] += 1 + elif 'MEDIUM' in vuln['Severity']: + tilmax = 3 if tilmax <= 3 else tilmax + summary['MEDIUM'] += 1 + elif 'LOW' in vuln['Severity']: + tilmax = 2 if tilmax <= 2 else tilmax + summary['LOW'] += 1 + elif 'UNKNOWN' in vuln['Severity']: + tilmax = 1 if tilmax <= 1 else tilmax + summary['UNKNOWN'] += 1 + sum = 0 + sum += summary['CRITICAL']*5 + sum += summary['HIGH']*4 + sum += summary['MEDIUM']*3 + sum += summary['LOW']*2 + sum += summary['UNKNOWN']*1 + + if count == 0: + # no vulnarbilites found + return [0, 0, 0] + tilavg = sum/count - print(count) - itlconv = min([tilavg + count/50, 7]) + print(summary) + itlconv = min([tilavg + count/50, 5]) return [tilmax, tilavg, itlconv] def detail(image, tag): - result = pull(image, tag) - print(json.dumps(result['Vulnerabilities'], indent=2)) + result = scan(image, tag) + print(json.dumps(result['Results'], indent=2)) with open('output.json', 'w') as f: - json.dump(result['Vulnerabilities'], f, indent=2) + json.dump(result['Results'], f, indent=2) choice = input("Manually whitelist image? [y/n]") if choice == 'y': message = {'image': image, 'override': True} @@ -195,15 +181,17 @@ def detail(image, tag): except: mode = 'http' if len(sys.argv) < 2: - print("Usage: scan.py ") + print("Usage: scan.py ") else: if sys.argv[1] == 'experiment': while True: - flag = input('This option will use a lot of storage and some data cannot be automatically cleaned up. Continue? [y/n]') + #flag = input('This option will use a lot of storage and some data cannot be automatically cleaned up. Continue? [y/n]') + flag = 'y' if flag == 'y': + # TODO enable get names again names = get_names() scan_all(names) - cleanup(names) + #cleanup(names) break elif flag == 'n': break @@ -226,24 +214,8 @@ def detail(image, tag): message = {"image": key, "level": value[0], "avg": value[1]} print(message) requests.post('http://localhost:10080', json=message) - elif sys.argv[1] == 'pull' and len(sys.argv) == 3: - result = check(sys.argv[2].split(':')[0], sys.argv[2].split(':')[1], 'pull') - if mode == 'rmq': - message = f'{{"image": "{sys.argv[2]}", "level": {result}}}' - print(message) - connection = pika.BlockingConnection(pika.ConnectionParameters('localhost')) - channel = connection.channel() - channel.queue_declare(queue='hello') - channel.basic_publish(exchange='', - routing_key='hello', - body=message) - connection.close() - elif mode == 'http': - message = {"image": sys.argv[2], "level": result[0], "avg": result[1]} - print(message) - requests.post('http://localhost:10080', json=message) - elif sys.argv[1] == 'push' and len(sys.argv) == 3: - result = check(sys.argv[2].split(':')[0], sys.argv[2].split(':')[1], 'push') + elif sys.argv[1] == 'scan' and len(sys.argv) == 3: + result = check(sys.argv[2].split(':')[0], sys.argv[2].split(':')[1]) if mode == 'rmq': message = f'{{"image": "{sys.argv[2]}", "level": {result}}}' print(message) @@ -272,4 +244,4 @@ def detail(image, tag): else: - print("Usage: scan.py ") + print("Usage: scan.py ")