Skip to content

Commit 04756bc

Browse files
committed
Add graph visualizations to anomaly detection
1 parent 43f2020 commit 04756bc

File tree

7 files changed

+353
-19
lines changed

7 files changed

+353
-19
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
# This script is dynamically triggered by "VisualizationReports.sh" when report "All" or "Visualization" is enabled.
4+
# It is designed as an entry point and delegates the execution to the dedicated "anomalyDetectionGraphVisualization.sh" script that does the "heavy lifting".
5+
6+
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
7+
8+
# Requires anomalyDetectionGraphVisualization.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
21+
# echo "anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
22+
23+
# Get the "summary" directory by taking the path of this script and selecting "summary".
24+
ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/graphs"} # Contains everything (scripts, queries, templates) to create the Markdown summary report for anomaly detection
25+
26+
# Delegate the execution to the responsible script.
27+
source "${ANOMALY_DETECTION_GRAPHS_DIR}/anomalyDetectionGraphVisualization.sh"
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Bottleneck" including their incoming dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
// Step 2: Query selected central node
9+
MATCH (central)
10+
WHERE $projection_node_label IN labels(central)
11+
AND central.anomalyBottleneckRank = toInteger($projection_node_rank)
12+
WITH maxWeight
13+
,central
14+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Bottleneck: " AS graphLabel
15+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
16+
,[] AS graphVizOutput
17+
WITH *, replace(replace(targetName, '.', '.\\n'), '/', '/\\n') AS targetNameSplit
18+
WITH *, targetNameSplit + "\\n(bottleneck #" + central.anomalyBottleneckRank + ")" AS centralNodeLabel
19+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
20+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
21+
// Step 3: Query direct incoming dependencies to the central node
22+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
23+
WHERE $projection_node_label IN labels(source)
24+
AND source.outgoingDependencies > 0
25+
ORDER BY dependency.weight DESC, source.name ASC
26+
LIMIT 30
27+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
28+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
29+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
30+
WITH maxWeight
31+
,central
32+
,graphVizOutput
33+
,collect(source) AS incomingDependencyNodes
34+
,collect("\"" + source.name + "\" -> central [" + edgeAttributes + "];") AS directInEdges
35+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
36+
// Step 4: Query direct outgoing dependencies from the central node
37+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
38+
WHERE $projection_node_label IN labels(source)
39+
AND source.incomingDependencies > 0
40+
ORDER BY dependency.weight DESC, source.name ASC
41+
LIMIT 30
42+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
43+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
44+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
45+
WITH maxWeight
46+
,central
47+
,graphVizOutput
48+
,incomingDependencyNodes
49+
,collect(source) AS outgoingDependencyNodes
50+
,collect("central -> \"" + source.name + "\" [" + edgeAttributes + "];") AS directOutEdges
51+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
52+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
53+
// Step 5: Query dependencies between direct dependencies outside the central node
54+
UNWIND directDependentNodes AS directDependentNode
55+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
56+
WHERE anotherDirectDependentNode IN directDependentNodes
57+
AND anotherDirectDependentNode <> directDependentNode
58+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
59+
WITH maxWeight
60+
,central
61+
,graphVizOutput
62+
,directDependentNode
63+
,dependency
64+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
65+
LIMIT 60
66+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
67+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
68+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.2" AS edgeAttributes
69+
WITH *, "\"" + directDependentNode.name + "\" -> \"" + firstLinkedDependentNode.name + "\"" AS directDependenciesEdge
70+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
71+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
72+
UNWIND graphVizOutput AS graphVizOutputLine
73+
RETURN DISTINCT graphVizOutputLine
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// This is a GraphViz dot template file for the visualization of a anomaly archetype graphs with a selected central node.
2+
// The main part of the template is marked by the comments "Begin-Template" and "End-Template".
3+
// It also contains a simple example graph.
4+
//
5+
strict digraph top_central_template {
6+
//Begin-Template
7+
graph [layout = "fdp"; start = "7"; splines = "spline"; beautify = true;];
8+
graph [fontname = "Helvetica,Arial,sans-serif"; labelloc = "t";];
9+
node [colorscheme = "blues9"; color = 6; fillcolor = 3;]; # Alternative: color = "0.58 0.75 0.75"; fillcolor = "0.58 0.15 0.99"
10+
edge [colorscheme = "blues9"; color = 7; ]; # Alternative: color = "0.58 0.75 0.85";
11+
node [fontsize = 8; style = "filled"; margin = "0.05,0.05"];
12+
edge [fontsize = 4; arrowsize = "0.4";];
13+
14+
central [shape = "doublecircle"; margin = "0.00001,0.00001";];
15+
central [fontsize = 10;];
16+
central [colorscheme = "bugn9"; color = 6; fillcolor = 4; penwidth = 3;]; # color = "0.52 0.7 0.7"; fillcolor = "0.52 0.4 0.9"
17+
18+
limit_hint [colorscheme = "bugn9"; color = 6; fillcolor = 4; penwidth = 2;] # color = "0.52 0.7 0.7"; fillcolor = "0.52 0.4 0.9"
19+
limit_hint [shape = "note"; fontsize = 10]
20+
limit_hint [label = "limited\nnode count";]
21+
limit_hint -> central [dir = "back"; arrowtail = "inv"]; // Signals that the number of edges might have been limited
22+
23+
//End-Template
24+
"A" -> "central" [penwidth = 1.0; label = 1;];
25+
"A" -> "B" [penwidth = 3.0; label = 4;];
26+
"B" -> "central" [penwidth = 2.0; label = 2;];
27+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "central" including their incoming dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
// Step 2: Query selected central node
9+
MATCH (central)
10+
WHERE $projection_node_label IN labels(central)
11+
AND central.anomalyHubRank = toInteger($projection_node_rank)
12+
WITH maxWeight
13+
,central
14+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Hub: " AS graphLabel
15+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
16+
,[] AS graphVizOutput
17+
WITH *, replace(replace(targetName, '.', '.\\n'), '/', '/\\n') AS targetNameSplit
18+
WITH *, targetNameSplit + "\\n(hub #" + central.anomalyHubRank + ")" AS centralNodeLabel
19+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
20+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
21+
// Step 3: Query direct incoming dependencies to the central node
22+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
23+
WHERE $projection_node_label IN labels(source)
24+
AND source.outgoingDependencies > 0
25+
ORDER BY dependency.weight DESC, source.name ASC
26+
LIMIT 70
27+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
28+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
29+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
30+
WITH maxWeight
31+
,central
32+
,graphVizOutput
33+
,collect(source) AS directDependentNodes
34+
,collect("\"" + source.name + "\" -> central [" + edgeAttributes + "];") AS directInEdges
35+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
36+
// Step 4: Query dependencies between direct dependencies outside the central node
37+
UNWIND directDependentNodes AS directDependentNode
38+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
39+
WHERE anotherDirectDependentNode IN directDependentNodes
40+
AND anotherDirectDependentNode <> directDependentNode
41+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
42+
WITH maxWeight
43+
,central
44+
,graphVizOutput
45+
,directDependentNode
46+
,dependency
47+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
48+
LIMIT 140
49+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
50+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
51+
WITH *, "weight=" + weight + "; penwidth=0.2" AS edgeAttributes
52+
WITH *, "\"" + directDependentNode.name + "\" -> \"" + firstLinkedDependentNode.name + "\"" AS directDependenciesEdge
53+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
54+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
55+
UNWIND graphVizOutput AS graphVizOutputLine
56+
RETURN DISTINCT graphVizOutputLine
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
#!/usr/bin/env bash
2+
3+
# Executes selected anomaly detection Cypher queries for GraphViz visualization.
4+
# Visualizes top ranked anomaly archetypes.
5+
# Requires an already running Neo4j graph database with already scanned and analyzed artifacts.
6+
# The reports (csv, dot and svg files) will be written into the sub directory reports/anomaly-detection/{language}_{codeUnit}.
7+
8+
# Requires executeQueryFunctions.sh, visualizeQueryResults.sh, cleanupAfterReportGeneration.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )}
21+
#echo "anomalyDetectionGraphVisualization: ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR}"
22+
23+
# Get the "scripts" directory by taking the path of this script and going one directory up.
24+
SCRIPTS_DIR=${SCRIPTS_DIR:-"${ANOMALY_DETECTION_GRAPHS_DIR}/../../../scripts"} # Repository directory containing the shell scripts
25+
# echo "anomalyDetectionGraphVisualization: SCRIPTS_DIR=${SCRIPTS_DIR}"
26+
27+
# Get the "scripts/visualization" directory.
28+
VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR:-"${SCRIPTS_DIR}/visualization"} # Repository directory containing the shell scripts for visualization
29+
# echo "anomalyDetectionGraphVisualization: VISUALIZATION_SCRIPTS_DIR=${VISUALIZATION_SCRIPTS_DIR}"
30+
31+
MARKDOWN_INCLUDES_DIRECTORY=${MARKDOWN_INCLUDES_DIRECTORY:-"includes"}
32+
ANOMALY_DETECTION_TOP_N_GRAPHS=${ANOMALY_DETECTION_TOP_N_GRAPHS:-5} # Number of top ranked graphs to visualize per query for anomaly detection.
33+
34+
# Define functions to execute cypher queries from within a given file
35+
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"
36+
37+
# Runs a parametrized query, converts their results in GraphViz format and creates a Graph visualization.
38+
# Outputs (at most) 10 indexed files (for report_name="TopHub" then TopHub1, TopHub2,...) with a focused visualization of one selected node and its surroundings.
39+
#
40+
# Required Parameters:
41+
# - report_name=...
42+
# Name of the query and then also the resulting visualization file.
43+
# - template_name=...
44+
# Name of the GraphViz template gv file.
45+
# - projection_language=...
46+
# Name of the associated programming language. Examples: "Java", "Typescript"
47+
# - projection_node_label=...
48+
# Label of the nodes that will be used for the projection. Example: "Package"
49+
create_graph_visualization() {
50+
local nodeLabel
51+
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
52+
53+
local language
54+
language=$( extractQueryParameter "projection_language" "${@}" )
55+
56+
local report_name
57+
report_name=$( extractQueryParameter "report_name" "${@}" )
58+
59+
local template_name
60+
template_name=$( extractQueryParameter "template_name" "${@}" )
61+
62+
echo "anomalyDetectionGraphVisualization: $(date +'%Y-%m-%dT%H:%M:%S%z') Creating ${language} ${nodeLabel} ${report_name} visualizations with template ${template_name}..."
63+
64+
local detail_report_directory_name="${language}_${nodeLabel}"
65+
local detail_report_directory="${FULL_REPORT_DIRECTORY}/${detail_report_directory_name}/GraphVisualizations"
66+
rm -rf "${detail_report_directory}"
67+
mkdir -p "${detail_report_directory}"
68+
69+
for ((index=1; index<=ANOMALY_DETECTION_TOP_N_GRAPHS; index++)); do
70+
# Query Graph data
71+
local resultFileName="${detail_report_directory}/${report_name}${index}"
72+
local queryResultFile="${resultFileName}.csv"
73+
execute_cypher "${ANOMALY_DETECTION_GRAPHS_DIR}/${report_name}.cypher" "${@}" "projection_node_rank=${index}" > "${queryResultFile}" || true
74+
75+
# Remove empty files
76+
# Note: Afterwards, detail_report_directory might be deleted as well.
77+
# In that case the image generation is finished and the loop needs to be terminated.
78+
source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${detail_report_directory}"
79+
# Stop generation as soon as the first query result is empty or the directory is deleted.
80+
if [ ! -f "${queryResultFile}" ] ; then
81+
break;
82+
fi
83+
84+
# Generate svg image using GraphViz
85+
source "${VISUALIZATION_SCRIPTS_DIR}/visualizeQueryResults.sh" "${queryResultFile}" --template "${ANOMALY_DETECTION_GRAPHS_DIR}/${template_name}.template.gv"
86+
87+
# Clean up after graph visualization image generation:
88+
rm -rf "${queryResultFile}" # Remove query result
89+
# Collect graphviz files in a "graphviz" sub directory
90+
mkdir -p "${detail_report_directory}/graphviz"
91+
mv -f "${resultFileName}.gv" "${detail_report_directory}/graphviz"
92+
93+
# Create visualization reference Markdown file to be embeddable in main Markdown report
94+
if [ "${index}" == "1" ]; then
95+
{
96+
echo ""
97+
echo "##### ${language} ${nodeLabel} - ${report_name} Graph Visualizations"
98+
echo ""
99+
} >> "${MARKDOWN_REFERENCE_FILE}"
100+
fi
101+
echo "![${report_name} ${index}](./${detail_report_directory_name}/GraphVisualizations/${report_name}${index}.svg)" >> "${MARKDOWN_REFERENCE_FILE}"
102+
done
103+
}
104+
105+
# Run queries, outputs their results in GraphViz format and create Graph visualizations.
106+
#
107+
# Required Parameters:
108+
# - projection_language=...
109+
# Name of the associated programming language. Examples: "Java", "Typescript"
110+
# - projection_node_label=...
111+
# Label of the nodes that will be used for the projection. Example: "Package"
112+
anomaly_detection_graph_visualization() {
113+
create_graph_visualization "report_name=TopHub" "template_name=TopCentral" "${@}"
114+
create_graph_visualization "report_name=TopBottleneck" "template_name=TopCentral" "${@}"
115+
}
116+
117+
118+
# Create report directory
119+
REPORT_NAME="anomaly-detection"
120+
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
121+
mkdir -p "${FULL_REPORT_DIRECTORY}"
122+
123+
MARKDOWN_REFERENCE_FILE="${FULL_REPORT_DIRECTORY}/${MARKDOWN_INCLUDES_DIRECTORY}/GraphVisualizationsReference.md"
124+
mkdir -p "${FULL_REPORT_DIRECTORY}/${MARKDOWN_INCLUDES_DIRECTORY}"
125+
rm -rf "${MARKDOWN_REFERENCE_FILE}"
126+
127+
# Query Parameter key pairs for projection and algorithm side
128+
QUERY_NODE="projection_node_label"
129+
QUERY_LANGUAGE="projection_language"
130+
131+
# -- Detail Reports for each code type -------------------------------
132+
133+
anomaly_detection_graph_visualization "${QUERY_NODE}=Artifact" "${QUERY_LANGUAGE}=Java"
134+
anomaly_detection_graph_visualization "${QUERY_NODE}=Package" "${QUERY_LANGUAGE}=Java"
135+
anomaly_detection_graph_visualization "${QUERY_NODE}=Type" "${QUERY_LANGUAGE}=Java"
136+
anomaly_detection_graph_visualization "${QUERY_NODE}=Module" "${QUERY_LANGUAGE}=Typescript"
137+
138+
# ---------------------------------------------------------------
139+
140+
echo "anomalyDetectionGraphVisualization: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished."

0 commit comments

Comments
 (0)