-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathphrase_extraction_evaluation.py
125 lines (110 loc) · 4.3 KB
/
phrase_extraction_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# phrase_extraction_evaluation.py
import numpy as np
from embedding import Embedder
from typing import List
class STSEvaluation:
treshhold = 0.75
@staticmethod
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
"""
Calculate the cosine similarity between two vectors.
Args:
a (np.ndarray): The first vector.
b (np.ndarray): The second vector.
Returns:
float: The cosine similarity score.
"""
dot_product = np.dot(a, b)
a_magnitude = np.linalg.norm(a)
b_magnitude = np.linalg.norm(b)
if a_magnitude == 0 or b_magnitude == 0:
return 0.0
return dot_product / (a_magnitude * b_magnitude)
@staticmethod
def make_similarity_matrix(predictions: List[np.ndarray], labels: List[np.ndarray]) -> np.ndarray:
"""
Create a similarity matrix between predictions and labels.
Args:
predictions (List[np.ndarray]): The prediction embeddings.
labels (List[np.ndarray]): The label embeddings.
Returns:
np.ndarray: The similarity matrix.
"""
return np.array([[STSEvaluation.cosine_similarity(p, l) for l in labels] for p in predictions])
@staticmethod
def average_cosine_score(similarity_matrix: np.ndarray) -> float:
"""
Calculate the average cosine similarity score.
Args:
similarity_matrix (np.ndarray): The similarity matrix.
Returns:
float: The average cosine similarity score.
"""
result = 0
for j in range(len(similarity_matrix[0])):
best_score = max(similarity_matrix[i][j] for i in range(len(similarity_matrix)))
result += best_score
return result / len(similarity_matrix[0])
@staticmethod
def matched_labels(similarity_matrix: np.ndarray) -> float:
"""
Calculate the ratio of matched labels.
Args:
similarity_matrix (np.ndarray): The similarity matrix.
Returns:
float: The ratio of matched labels.
"""
return sum(any(similarity_matrix[i][j] >= STSEvaluation.treshhold for i in range(len(similarity_matrix))) for j in range(len(similarity_matrix[0]))) / len(similarity_matrix[0])
@staticmethod
def redundancy(predictions: List[np.ndarray]) -> float:
"""
Calculate the redundancy of the predictions.
Args:
predictions (List[np.ndarray]): The prediction embeddings.
Returns:
float: The redundancy score.
"""
match_count = 0
N = len(predictions)
for i in range(N):
for j in range(i + 1, N):
if STSEvaluation.cosine_similarity(predictions[i], predictions[j]) >= STSEvaluation.treshhold:
match_count += 1
return match_count / (N * (N - 1) / 2)
@staticmethod
def groundness(text: str, phrases: List[str]) -> float:
"""
Calculate the groundness of the predictions.
Args:
text (str): The input text.
phrases (List[str]): The predicted phrases.
Returns:
float: The groundness score.
"""
count = 0
text = text.lower()
for phrase in phrases:
if phrase.lower() in text:
count += 1
return count / len(phrases)
@staticmethod
def evaluate_phrases(predictions: List[str], labels: List[str], text: str) -> dict:
"""
Evaluate the predicted phrases against the labels.
Args:
predictions (List[str]): The predicted phrases.
labels (List[str]): The correct labels.
text (str): The input text.
Returns:
dict: The evaluation metrics.
"""
emb_predictions = Embedder.get_embedding(predictions)
emb_labels = Embedder.get_embedding(labels)
similarity_matrix = STSEvaluation.make_similarity_matrix(emb_predictions, emb_labels)
results = {
"cosine": STSEvaluation.average_cosine_score(similarity_matrix),
"matchings": STSEvaluation.matched_labels(similarity_matrix),
"redundancy": STSEvaluation.redundancy(emb_predictions),
"groundness": STSEvaluation.groundness(text, predictions)
}
return results