-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathClusterizator.py
25 lines (19 loc) · 926 Bytes
/
Clusterizator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from sklearn.cluster import KMeans, AgglomerativeClustering, AffinityPropagation
import numpy as np
import distance
import pandas as pd
def cluster (anomaly_dict, anomaly_list):
clustered_anomalies = pd.DataFrame()
dict_keys = list(anomaly_dict.keys())
words = np.asarray(anomaly_list)
lev_similarity = -1 * np.array([[distance.levenshtein(w1, w2) for w1 in words] for w2 in words])
affprop = AffinityPropagation(affinity="precomputed", damping=0.5)
affprop.fit(lev_similarity)
for i in range(len(affprop.labels_)):
current_trace = anomaly_list[i]
current_label = affprop.labels_[i]
current_id = dict_keys[i]
row = pd.DataFrame([[current_id, current_trace, current_label]],
columns=["UNIQ_ID", "trace", "cluster_label"])
clustered_anomalies = clustered_anomalies.append(row, ignore_index=True)
return clustered_anomalies