Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions docs/content/en/connecting_your_tools/parsers/file/openvas.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,39 @@
title: "OpenVAS Parser"
toc_hide: true
---
You can either upload the exported results of an OpenVAS Scan in a .csv or .xml format.
You can upload the results of an OpenVAS/Greenbone report in either .csv or .xml format.

### Sample Scan Data
Sample OpenVAS scans can be found [here](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/openvas).

### Default Deduplication Hashcode Fields
By default, DefectDojo identifies duplicate Findings using these [hashcode fields](https://docs.defectdojo.com/en/working_with_findings/finding_deduplication/about_deduplication/):
### Parser versions
The OpenVAS parser has two versions: Version 2 and the legacy version. Only version 2 should be used going forward. This documentation assumes Version 2 going forward.

Version 2 comes with a number of improvements:
- Use of a hash code algorithm for deduplication
- Increased consistency in parsing between the XML and CSV parsers.
- Combined findings where the only differences are in fields that cannot be rehashed due to inconsistent values between scans (e.g. fields containing timestamps or packet IDs). This prevents duplicates if the vulnerability is found multiple times on the same endpoint.
- Increased parser value coverage
- Heuristic for fix_available detection
- Updated mapping to DefectDojo fields compared to version 1.

### Deduplication Algorithm
Default Deduplication Hashcode Fields:
By default, DefectDojo Parser V2 identifies duplicate findings using the following [hashcode fields](https://docs.defectdojo.com/en/working_with_findings/finding_deduplication/about_deduplication/):

- title
- cwe
- line
- file path
- description
- severity
- vuln_id_from_tool
- endpoints

The legacy version (version 1) uses the legacy deduplication algorithm.

### CSV and XML differences and similarityies
The parser attempts to parse XML and CSV files in a similar way. However, this is not always possible. The following lists the differences between the parsers:

- EPSS scores and percentiles are only available in CSV format.
- CVSS vectors are only available in the XML format.
- The CVSS score will always be reported as CVSS v3 in the CSV parser
- The references in the CSV parser will never contain URLs.

If no supported CVSS version is detected, the score (if present) is registered as a CVSS v3 score, even if this is incorrect.
3 changes: 3 additions & 0 deletions dojo/settings/settings.dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -1363,6 +1363,7 @@ def saml2_attrib_map_format(din):
"Qualys Hacker Guardian Scan": ["title", "severity", "description"],
"Cyberwatch scan (Galeax)": ["title", "description", "severity"],
"Cycognito Scan": ["title", "severity"],
"OpenVAS Parser v2": ["title", "severity", "vuln_id_from_tool", "endpoints"],
}

# Override the hardcoded settings here via the env var
Expand Down Expand Up @@ -1434,6 +1435,7 @@ def saml2_attrib_map_format(din):
"HCL AppScan on Cloud SAST XML": True,
"AWS Inspector2 Scan": True,
"Cyberwatch scan (Galeax)": True,
"OpenVAS Parser v2": True,
}

# List of fields that are known to be usable in hash_code computation)
Expand Down Expand Up @@ -1620,6 +1622,7 @@ def saml2_attrib_map_format(din):
"Red Hat Satellite": DEDUPE_ALGO_HASH_CODE,
"Qualys Hacker Guardian Scan": DEDUPE_ALGO_HASH_CODE,
"Cyberwatch scan (Galeax)": DEDUPE_ALGO_HASH_CODE,
"OpenVAS Parser v2": DEDUPE_ALGO_HASH_CODE,
}

# Override the hardcoded settings here via the env var
Expand Down
7 changes: 6 additions & 1 deletion dojo/tools/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,12 @@ def requires_tool_type(scan_type):
module = import_module(f"dojo.tools.{module_name}.parser")
for attribute_name in dir(module):
attribute = getattr(module, attribute_name)
if isclass(attribute) and attribute_name.lower() == module_name.replace("_", "") + "parser":
# Allow parser class names with optional v[number] suffix (e.g., OpenVASParser, OpenVASParserV2)
expected_base = module_name.replace("_", "") + "parser"
if isclass(attribute) and (
attribute_name.lower() == expected_base or
re.match(rf"^{re.escape(expected_base)}v\d+$", attribute_name.lower())
):
register(attribute)
except:
logger.exception("failed to load %s", module_name)
24 changes: 22 additions & 2 deletions dojo/tools/openvas/parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from dojo.tools.openvas.csv_parser import OpenVASCSVParser
from dojo.tools.openvas.xml_parser import OpenVASXMLParser
from dojo.tools.openvas.parser_v1.csv_parser import OpenVASCSVParser
from dojo.tools.openvas.parser_v1.xml_parser import OpenVASXMLParser
from dojo.tools.openvas.parser_v2.csv_parser import get_findings_from_csv
from dojo.tools.openvas.parser_v2.xml_parser import get_findings_from_xml


class OpenVASParser:
Expand All @@ -18,3 +20,21 @@ def get_findings(self, filename, test):
if str(filename.name).endswith(".xml"):
return OpenVASXMLParser().get_findings(filename, test)
return None


class OpenVASParserV2:
def get_scan_types(self):
return ["OpenVAS Parser v2"]

def get_label_for_scan_types(self, scan_type):
return scan_type

def get_description_for_scan_types(self, scan_type):
return "Import CSV or XML output of Greenbone OpenVAS report."

def get_findings(self, file, test):
if str(file.name).endswith(".csv"):
return get_findings_from_csv(file, test)
if str(file.name).endswith(".xml"):
return get_findings_from_xml(file, test)
return None
Empty file.
Empty file.
112 changes: 112 additions & 0 deletions dojo/tools/openvas/parser_v2/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import hashlib
from dataclasses import dataclass

from dojo.models import Endpoint, Finding


@dataclass
class OpenVASFindingAuxData:

"""Dataclass to contain all information added later to a finding"""

references: list[str]
summary: str = ""
qod: str = ""
openvas_result: str = ""
fallback_cvss_score: float | None = None


def setup_finding(test) -> tuple[Finding, OpenVASFindingAuxData]:
"""Base setup and init for findings and auxiliary data"""
finding = Finding(test=test, dynamic_finding=True, static_finding=False, severity="Info", nb_occurences=1, cwe=None)
finding.unsaved_vulnerability_ids = []
finding.unsaved_endpoints = [Endpoint()]

aux_info = OpenVASFindingAuxData([])

return finding, aux_info


def is_valid_severity(severity: str) -> bool:
valid_severity = ("Info", "Low", "Medium", "High", "Critical")
return severity in valid_severity


def cleanup_openvas_text(text: str) -> str:
"""Removes unnessesary defectojo newlines"""
return text.replace("\n ", " ")


def escape_restructured_text(text: str) -> str:
"""Changes text so that restructured text symbols are not interpreted"""
# OpenVAS likes to include markdown like tables in some fields
# Defectdojo uses reStructuredText which causes them to be rendered wrong
text = text.replace("```", "")
text = text.replace("```", "")
return f"```\n{text}\n```"


def postprocess_finding(finding: Finding, aux_info: OpenVASFindingAuxData):
"""Update finding with AuxData content"""
if aux_info.openvas_result:
finding.steps_to_reproduce = escape_restructured_text(cleanup_openvas_text(aux_info.openvas_result))
if aux_info.summary:
finding.description += f"\n**Summary**: {cleanup_openvas_text(aux_info.summary)}"
if aux_info.qod:
finding.description += f"\n**QoD**: {aux_info.qod}"
if len(aux_info.references) > 0:
finding.references = "\n".join(["- " + ref for ref in aux_info.references])
# fallback in case no cvss version is detected
if aux_info.fallback_cvss_score and not finding.cvssv3_score and not finding.cvssv4_score:
finding.cvssv3_score = aux_info.fallback_cvss_score

# heuristic for fixed-available detection
if finding.mitigation:
search_terms = ["Update to version", "The vendor has released updates"]
if any(text in finding.mitigation for text in search_terms):
finding.fix_available = True


def deduplicate(dupes: dict[str, Finding], finding: Finding):
"""Combine multiple openvas findings into one defectdojo finding with potentially multiple endpoints"""
finding_hash = gen_finding_hash(finding)

if finding_hash not in dupes:
dupes[finding_hash] = finding
else:
# OpenVas does not combine multiple findings into one
# e.g if 2 vulnerable java runtimes are present on the host this is reported as 2 finding.
# The only way do differantiate theese findings when they are based on the same vulnerabilty
# is the data in mapped to steps to reproduce.
# However we cannot hash this field as it can contain data that changes between scans
# e.g timestamps or packet ids
# we therfore combine them into one defectdojo finding because duplicates during reimport cause
# https://github.com/DefectDojo/django-DefectDojo/issues/3958
org = dupes[finding_hash]
org.nb_occurences += 1
if org.steps_to_reproduce != finding.steps_to_reproduce:
if "Endpoint" in org.steps_to_reproduce:
org.steps_to_reproduce += "\n---------------------------------------\n"
org.steps_to_reproduce += f"**Endpoint**: {finding.unsaved_endpoints[0].host}\n"
org.steps_to_reproduce += finding.steps_to_reproduce
else:
tmp = org.steps_to_reproduce
org.steps_to_reproduce = f"**Endpoint**: {org.unsaved_endpoints[0].host}\n"
org.steps_to_reproduce += tmp

# combine identical findings on different hosts into one with multiple hosts
endpoint = finding.unsaved_endpoints[0]
if endpoint not in org.unsaved_endpoints:
org.unsaved_endpoints += finding.unsaved_endpoints


def gen_finding_hash(finding: Finding) -> str:
"""Generate a hash for a finding that is used for deduplication of findings inside the current report"""
endpoint = finding.unsaved_endpoints[0]
hash_data = [
str(endpoint),
finding.title,
finding.vuln_id_from_tool,
finding.severity,
]
return hashlib.sha256("|".join(hash_data).encode("utf-8")).hexdigest()
Loading