omidf commited on
Commit
eee73fc
1 Parent(s): 63e7fe5

Update compute_score.py

Browse files
Files changed (1) hide show
  1. compute_score.py +25 -3
compute_score.py CHANGED
@@ -26,6 +26,25 @@ def normalize_answer(s):
26
 
27
  return white_space_fix(remove_articles(remove_punc(lower(s))))
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def f1_score(prediction, ground_truth):
31
  prediction_tokens = normalize_answer(prediction).split()
@@ -53,7 +72,7 @@ def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
53
 
54
 
55
  def compute_score(dataset, predictions):
56
- f1 = exact_match = total = 0
57
  for article in dataset:
58
  for paragraph in article["paragraphs"]:
59
  for qa in paragraph["qas"]:
@@ -66,11 +85,14 @@ def compute_score(dataset, predictions):
66
  prediction = predictions[qa["id"]]
67
  exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
68
  f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
69
-
 
70
  exact_match = 100.0 * exact_match / total
71
  f1 = 100.0 * f1 / total
 
 
72
 
73
- return {"exact_match": exact_match, "f1": f1}
74
 
75
 
76
  if __name__ == "__main__":
 
26
 
27
  return white_space_fix(remove_articles(remove_punc(lower(s))))
28
 
29
+ def precision_score(prediction, ground_truth):
30
+ prediction_tokens = normalize_answer(prediction).split()
31
+ ground_truth_tokens = normalize_answer(ground_truth).split()
32
+ common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
33
+ num_same = sum(common.values())
34
+ if num_same == 0:
35
+ return 0
36
+ precision = 1.0 * num_same / len(prediction_tokens)
37
+ return precision
38
+
39
+ def recall_score(prediction, ground_truth):
40
+ prediction_tokens = normalize_answer(prediction).split()
41
+ ground_truth_tokens = normalize_answer(ground_truth).split()
42
+ common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
43
+ num_same = sum(common.values())
44
+ if num_same == 0:
45
+ return 0
46
+ recall = 1.0 * num_same / len(ground_truth_tokens)
47
+ return recall
48
 
49
  def f1_score(prediction, ground_truth):
50
  prediction_tokens = normalize_answer(prediction).split()
 
72
 
73
 
74
  def compute_score(dataset, predictions):
75
+ precision = recall = f1 = exact_match = total = 0
76
  for article in dataset:
77
  for paragraph in article["paragraphs"]:
78
  for qa in paragraph["qas"]:
 
85
  prediction = predictions[qa["id"]]
86
  exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
87
  f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
88
+ precision += metric_max_over_ground_truths(precision_score, prediction, ground_truths)
89
+ recall += metric_max_over_ground_truths(recall_score, prediction, ground_truths)
90
  exact_match = 100.0 * exact_match / total
91
  f1 = 100.0 * f1 / total
92
+ recall = 100.0 * recall / total
93
+ precision = 100.0 * precision / total
94
 
95
+ return {"exact_match": exact_match, "f1": f1, "precision": precision , "recall": recall}
96
 
97
 
98
  if __name__ == "__main__":