Skip to content

Commit 507e43a

Browse files
committed
Add anomaly detection Markdown summary report
1 parent 496e28e commit 507e43a

14 files changed

+586
-0
lines changed

domains/anomaly-detection/anomalyDetectionCsv.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ anomaly_detection_features() {
7070
# Required Parameters:
7171
# - projection_node_label=...
7272
# Label of the nodes that will be used for the projection. Example: "Package"
73+
# - projection_language=...
74+
# Name of the associated programming language. Default: "Java". Example: "Typescript"
7375
anomaly_detection_queries() {
7476
local nodeLabel
7577
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -99,6 +101,8 @@ anomaly_detection_queries() {
99101
# Required Parameters:
100102
# - projection_node_label=...
101103
# Label of the nodes that will be used for the projection. Example: "Package"
104+
# - projection_language=...
105+
# Name of the associated programming language. Examples: "Java", "Typescript"
102106
anomaly_detection_labels() {
103107
local nodeLabel
104108
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -129,6 +133,8 @@ anomaly_detection_labels() {
129133
# Label of the nodes that will be used for the projection. Example: "Package"
130134
# - projection_weight_property=...
131135
# Name of the node property that contains the dependency weight. Example: "weight"
136+
# - projection_language=...
137+
# Name of the associated programming language. Examples: "Java", "Typescript"
132138
anomaly_detection_csv_reports() {
133139
time anomaly_detection_features "${@}"
134140
time anomaly_detection_queries "${@}"
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Anomaly Detection Summary: Summarizes all labelled archetypes by their anomaly score including examples. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
UNWIND keys(codeUnit) AS codeUnitProperty
6+
WITH *
7+
WHERE codeUnitProperty STARTS WITH 'anomaly'
8+
AND codeUnitProperty ENDS WITH 'Rank'
9+
WITH *
10+
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
11+
,split(split(codeUnitProperty, 'anomaly')[1], 'Rank')[0] AS archetype
12+
,codeUnit[codeUnitProperty] AS archetypeRank
13+
,codeUnit.anomalyScore AS anomalyScore
14+
WITH *, collect(archetype)[0] AS archetype
15+
ORDER BY codeUnit.anomalyScore DESC, archetypeRank ASC, codeUnitName ASC, archetype ASC
16+
WITH archetype
17+
,anomalyScore
18+
,CASE WHEN codeUnit.anomalyScore <= 0 THEN 'Typical'
19+
WHEN codeUnit.anomalyTopFeature1 IS NULL THEN 'Undetermined'
20+
ELSE 'Anomalous' END AS modelStatus
21+
,codeUnitName
22+
RETURN archetype AS `Archetype`
23+
,count(DISTINCT codeUnitName) AS `Count`
24+
,round(max(anomalyScore), 4, 'HALF_UP') AS `Max. Score`
25+
,modelStatus AS `Model Status`
26+
,apoc.text.join(collect(DISTINCT codeUnitName)[0..3], ', ') AS `Examples`
27+
ORDER BY modelStatus, archetype, `Max. Score` DESC
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Anomaly Detection DeepDive: Overview of analyzed code units and the number of anomalies detected. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND (codeUnit.incomingDependencies IS NOT NULL
6+
OR codeUnit.outgoingDependencies IS NOT NULL)
7+
WITH sum(codeUnit.anomalyLabel) AS anomalyCount
8+
,sum(sign(codeUnit.anomalyAuthorityRank)) AS authorityCount
9+
,sum(sign(codeUnit.anomalyBottleneckRank)) AS bottleNeckCount
10+
,sum(sign(codeUnit.anomalyBridgeRank)) AS bridgeCount
11+
,sum(sign(codeUnit.anomalyHubRank)) AS hubCount
12+
,sum(sign(codeUnit.anomalyOutlierRank)) AS outlierCount
13+
//,collect(codeUnit.name)[0..4] AS exampleNames
14+
RETURN anomalyCount AS `Anomalies`
15+
,authorityCount AS `Authorities`
16+
,bottleNeckCount AS `Bottlenecks`
17+
,bridgeCount AS `Bridges`
18+
,hubCount AS `Hubs`
19+
,outlierCount AS `Outliers`
20+
//,exampleNames
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Anomaly Detection Summary: Overview of all analyzed code units in total. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE (codeUnit.incomingDependencies IS NOT NULL
5+
OR codeUnit.outgoingDependencies IS NOT NULL)
6+
WITH count(DISTINCT codeUnit) AS codeUnitCount
7+
,sum(codeUnit.anomalyLabel) AS anomalyCount
8+
,sum(sign(codeUnit.anomalyAuthorityRank)) AS authorityCount
9+
,sum(sign(codeUnit.anomalyBottleneckRank)) AS bottleNeckCount
10+
,sum(sign(codeUnit.anomalyBridgeRank)) AS bridgeCount
11+
,sum(sign(codeUnit.anomalyHubRank)) AS hubCount
12+
,sum(sign(codeUnit.anomalyOutlierRank)) AS outlierCount
13+
//,collect(codeUnit.name)[0..4] AS exampleNames
14+
RETURN codeUnitCount AS `Analyzed Units`
15+
,anomalyCount AS `Anomalies`
16+
,authorityCount AS `Authorities`
17+
,bottleNeckCount AS `Bottlenecks`
18+
,bridgeCount AS `Bridges`
19+
,hubCount AS `Hubs`
20+
,outlierCount AS `Outliers`
21+
//,exampleNames
22+
ORDER BY anomalyCount DESC, codeUnitCount DESC
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Anomaly Detection Summary: Overview of analyzed code units and the number of anomalies detected. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE (codeUnit.incomingDependencies IS NOT NULL
5+
OR codeUnit.outgoingDependencies IS NOT NULL)
6+
UNWIND labels(codeUnit) AS codeUnitLabel
7+
WITH *
8+
WHERE NOT codeUnitLabel STARTS WITH 'Mark4'
9+
AND NOT codeUnitLabel IN ['File', 'Directory', 'ByteCode', 'GenericDeclaration']
10+
WITH collect(codeUnitLabel) AS codeUnitLabels
11+
,codeUnit
12+
WITH apoc.text.join(codeUnitLabels, ',') AS codeUnitLabels
13+
,count(DISTINCT codeUnit) AS codeUnitCount
14+
,sum(codeUnit.anomalyLabel) AS anomalyCount
15+
,sum(sign(codeUnit.anomalyAuthorityRank)) AS authorityCount
16+
,sum(sign(codeUnit.anomalyBottleneckRank)) AS bottleNeckCount
17+
,sum(sign(codeUnit.anomalyBridgeRank)) AS bridgeCount
18+
,sum(sign(codeUnit.anomalyHubRank)) AS hubCount
19+
,sum(sign(codeUnit.anomalyOutlierRank)) AS outlierCount
20+
//,collect(codeUnit.name)[0..4] AS exampleNames
21+
RETURN codeUnitLabels AS `Abstraction Level`
22+
,codeUnitCount AS `Units`
23+
,anomalyCount AS `Anomalies`
24+
,authorityCount AS `Authorities`
25+
,bottleNeckCount AS `Bottlenecks`
26+
,bridgeCount AS `Bridges`
27+
,hubCount AS `Hubs`
28+
,outlierCount AS `Outliers`
29+
//,exampleNames
30+
ORDER BY anomalyCount DESC, codeUnitCount DESC
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Anomaly Detection Summary: Lists top anomalies (at most 20), the top 3 features that contributed to the decision and the archetype(s) classification (if available) they are assigned to. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.anomalyScore > 0
6+
ORDER BY codeUnit.anomalyScore DESC
7+
UNWIND keys(codeUnit) AS codeUnitProperty
8+
WITH codeUnit
9+
,CASE WHEN codeUnitProperty STARTS WITH 'anomaly'
10+
AND codeUnitProperty ENDS WITH 'Rank'
11+
THEN split(split(codeUnitProperty, 'anomaly')[1], 'Rank')[0]
12+
END AS archetype
13+
,CASE WHEN codeUnitProperty STARTS WITH 'anomaly'
14+
AND codeUnitProperty ENDS WITH 'Rank'
15+
THEN codeUnit[codeUnitProperty]
16+
END AS archetypeRank
17+
ORDER BY codeUnit.anomalyScore DESC, archetypeRank ASC
18+
WITH codeUnit
19+
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
20+
,apoc.text.join(collect(DISTINCT archetype), ', ') AS archetypes
21+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
22+
WITH *, artifact.name AS artifactName
23+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
24+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
25+
OPTIONAL MATCH (codeDirectory:File:Directory)-[:CONTAINS]->(codeUnit)
26+
WITH *, split(replace(codeDirectory.fileName, './', ''), '/')[-2] AS directoryName
27+
WITH *, coalesce(artifactName, projectName, directoryName, "") AS projectName
28+
RETURN codeUnitName AS `Name`
29+
,projectName AS `Contained in`
30+
,round(codeUnit.anomalyScore, 4, 'HALF_UP') AS `Anomaly Score`
31+
,collect(archetypes)[0] AS `Archetypes`
32+
,nullif(codeUnit.anomalyTopFeature1, "") AS `Top Feature 1`
33+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue1, 4, 'HALF_UP'), 0.0) AS `Top Feature 1 SHAP`
34+
,nullif(codeUnit.anomalyTopFeature2, "") AS `Top Feature 2`
35+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue2, 4, 'HALF_UP'), 0.0) AS `Top Feature 2 SHAP`
36+
,nullif(codeUnit.anomalyTopFeature3, "") AS `Top Feature 3`
37+
,nullif(round(codeUnit.anomalyTopFeatureSHAPValue3, 4, 'HALF_UP'), 0.0) AS `Top Feature 3 SHAP`
38+
,CASE WHEN codeUnit.anomalyScore <= 0 THEN 'Typical'
39+
WHEN codeUnit.anomalyTopFeature1 IS NULL THEN 'Undetermined'
40+
ELSE 'Anomalous' END AS `Model Status`
41+
LIMIT 20

0 commit comments

Comments
 (0)