Skip to content

Commit 8f86f7a

Browse files
committed
Add normalized PageRank to ArticleRank difference to features
1 parent edf7091 commit 8f86f7a

File tree

5 files changed

+47
-28
lines changed

5 files changed

+47
-28
lines changed

domains/anomaly-detection/anomalyDetectionCsv.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ anomaly_detection_features() {
6161
# Determine the article rank if not already done
6262
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
6363
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Write.cypher" "${@}"
64+
# Determine the normalized difference between Page Rank and Article Rank if not already done
65+
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Exists.cypher" \
66+
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Write.cypher" "${@}"
6467
}
6568

6669
# Run queries to find anomalies in the graph.

domains/anomaly-detection/anomalyDetectionPython.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ anomaly_detection_features() {
106106
# Determine the article rank if not already done
107107
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
108108
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Write.cypher" "${@}"
109+
# Determine the normalized difference between Page Rank and Article Rank if not already done
110+
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Exists.cypher" \
111+
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Write.cypher" "${@}"
109112
}
110113

111114
# Execute the Python scripts for anomaly detection.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Return the first node with (amongst others) a "centralityPageRankToArticleRankDifference" property if it exists
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.centralityPageRankToArticleRankDifference IS NOT NULL
6+
AND codeUnit.centralityPageRankNormalized IS NOT NULL
7+
AND codeUnit.centralityPArticleRankNormalized IS NOT NULL
8+
RETURN codeUnit.name AS shortCodeUnitName
9+
,elementId(codeUnit) AS nodeElementId
10+
,codeUnit.centralityPageRankToArticleRankDifference AS pageToArticleRankDifference
11+
LIMIT 1
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Calculates and writes the (amongst others) "centralityPageRankToArticleRankDifference" property.
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
AND codeUnit.centralityPageRank IS NOT NULL
6+
AND codeUnit.centralityArticleRank IS NOT NULL
7+
WITH collect(codeUnit) AS codeUnits
8+
,min(codeUnit.centralityPageRank) AS minPageRank
9+
,max(codeUnit.centralityPageRank) AS maxPageRank
10+
,min(codeUnit.centralityArticleRank) AS minArticleRank
11+
,max(codeUnit.centralityArticleRank) AS maxArticleRank
12+
UNWIND codeUnits AS codeUnit
13+
WITH *
14+
,(codeUnit.centralityPageRank - minPageRank) / (maxPageRank - minPageRank) AS normalizedPageRank
15+
,(codeUnit.centralityArticleRank - minArticleRank) / (maxArticleRank - minArticleRank) AS normalizedArticleRank
16+
WITH *
17+
,normalizedPageRank - normalizedArticleRank AS normalizedPageRankToArticleRankDifference
18+
SET codeUnit.centralityPageRankToArticleRankDifference = normalizedPageRankToArticleRankDifference
19+
,codeUnit.centralityPageRankNormalized = normalizedPageRank
20+
,codeUnit.centralityArticleRankNormalized = normalizedArticleRank
21+
RETURN count(*) AS nodePropertiesWritten

domains/anomaly-detection/labels/AnomalyDetectionArchetypeAuthority.cypher

Lines changed: 9 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,39 +11,22 @@
1111
,min(codeUnit.centralityArticleRank) AS minArticleRank
1212
,max(codeUnit.centralityArticleRank) AS maxArticleRank
1313
,percentileDisc(codeUnit.centralityPageRank, 0.90) AS pageRankThreshold
14+
,percentileDisc(codeUnit.centralityPageRankToArticleRankDifference, 0.90) AS pageToArticleRankDifferenceThreshold
1415
UNWIND codeUnits AS codeUnit
1516
WITH *
16-
WHERE codeUnit.centralityPageRank >= pageRankThreshold
17-
WITH *
18-
,(codeUnit.centralityPageRank - minPageRank) / (maxPageRank - minPageRank) AS normalizedPageRank
19-
,(codeUnit.centralityArticleRank - minArticleRank) / (maxArticleRank - minArticleRank) AS normalizedArticleRank
20-
WITH *
21-
,normalizedPageRank - normalizedArticleRank AS normalizedPageRankToArticleRankDifference
22-
WITH collect(codeUnit) AS codeUnits
23-
,minPageRank, maxPageRank, minArticleRank, maxArticleRank
24-
,percentileDisc(normalizedPageRankToArticleRankDifference, 0.90) AS pageToArticleRankDifferenceThreshold
25-
UNWIND codeUnits AS codeUnit
26-
WITH *
27-
,(codeUnit.centralityPageRank - minPageRank) / (maxPageRank - minPageRank) AS normalizedPageRank
28-
,(codeUnit.centralityArticleRank - minArticleRank) / (maxArticleRank - minArticleRank) AS normalizedArticleRank
29-
WITH *
30-
,normalizedPageRank - normalizedArticleRank AS normalizedPageRankToArticleRankDifference
31-
WHERE normalizedPageRankToArticleRankDifference >= pageToArticleRankDifferenceThreshold
17+
WHERE codeUnit.centralityPageRankToArticleRankDifference >= pageToArticleRankDifferenceThreshold
3218
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
3319
WITH *, artifact.name AS artifactName
3420
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
3521
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
3622
WITH *, coalesce(artifactName, projectName) AS projectName
3723
ORDER BY codeUnit.centralityPageRank DESC, codeUnit.centralityArticleRank ASC
3824
LIMIT 10
39-
WITH collect([codeUnit, projectName, normalizedPageRank, normalizedArticleRank, normalizedPageRankToArticleRankDifference]) AS results
40-
UNWIND range(0, size(results) - 1) AS codeUnitIndex
41-
WITH codeUnitIndex + 1 AS codeUnitIndex
42-
,results[codeUnitIndex][0] AS codeUnit
43-
,results[codeUnitIndex][1] AS projectName
44-
,results[codeUnitIndex][2] AS normalizedPageRank
45-
,results[codeUnitIndex][3] AS normalizedArticleRank
46-
,results[codeUnitIndex][4] AS normalizedPageRankToArticleRankDifference
25+
WITH collect([codeUnit, projectName]) AS results
26+
UNWIND range(0, size(results) - 1) AS codeUnitIndex
27+
WITH codeUnitIndex + 1 AS codeUnitIndex
28+
,results[codeUnitIndex][0] AS codeUnit
29+
,results[codeUnitIndex][1] AS projectName
4730
SET codeUnit:Mark4TopAnomalyAuthority
4831
,codeUnit.anomalyAuthorityRank = codeUnitIndex
4932
RETURN DISTINCT
@@ -52,7 +35,5 @@ OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS
5235
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
5336
,codeUnit.centralityPageRank AS pageRank
5437
,codeUnit.centralityArticleRank AS articleRank
55-
,codeUnit.anomalyAuthorityRank AS rank
56-
,normalizedPageRank
57-
,normalizedArticleRank
58-
,normalizedPageRankToArticleRankDifference
38+
,codeUnit.centralityPageRankToArticleRankDifference AS normalizedPageRankToArticleRankDifference
39+
,codeUnit.anomalyAuthorityRank AS rank

0 commit comments

Comments
 (0)