Viona commited on
Commit
6be1f2a
1 Parent(s): 4beaa0d

correcting metric info

Browse files
Files changed (2) hide show
  1. anls.py +13 -35
  2. compute_score.py +6 -11
anls.py CHANGED
@@ -41,21 +41,17 @@ _KWARGS_DESCRIPTION = """
41
  Computes Average Normalized Levenshtein Similarity (ANLS).
42
  Args:
43
  predictions: List of question-answers dictionaries with the following key-values:
44
- - 'id': id of the question-answer pair as given in the references (see below)
45
  - 'prediction_text': the text of the answer
46
  references: List of question-answers dictionaries with the following key-values:
47
- - 'id': id of the question-answer pair (see above),
48
- - 'answers': a Dict in the SQuAD dataset format
49
- {
50
- 'text': list of possible texts for the answer, as a list of strings
51
- 'answer_start': list of start positions for the answer, as a list of ints
52
- }
53
- Note that answer_start values are not taken into account to compute the metric.
54
  Returns:
55
  'anls': The ANLS score of predicted tokens versus the gold answer
56
  Examples:
57
  >>> predictions = [{'prediction_text': 'Denver Broncos', 'question_id': '56e10a3be3433e1400422b22'}]
58
- >>> references = [{'answers': ['Denver Broncos', 'Denver R. Broncos']}, 'question_id': '56e10a3be3433e1400422b22'}]
59
  >>> anls_metric = evaluate.load("anls")
60
  >>> results = anls_metric.compute(predictions=predictions, references=references)
61
  >>> print(results)
@@ -72,36 +68,18 @@ class Anls(evaluate.Metric):
72
  inputs_description=_KWARGS_DESCRIPTION,
73
  features=datasets.Features(
74
  {
75
- "predictions": {"id": datasets.Value("string"), "prediction_text": datasets.Value("string")},
 
76
  "references": {
77
- "id": datasets.Value("string"),
78
- "answers": datasets.features.Sequence(
79
- {
80
- "text": datasets.Value("string"),
81
- "answer_start": datasets.Value("int32"),
82
- }
83
- ),
84
  },
85
  }
86
  )
87
  )
88
 
89
  def _compute(self, predictions, references):
90
- prediction_dict = {prediction["id"]: prediction["prediction_text"] for prediction in predictions}
91
- dataset = [
92
- {
93
- "paragraphs": [
94
- {
95
- "qas": [
96
- {
97
- "answers": [{"text": answer_text} for answer_text in ref["answers"]["text"]],
98
- "id": ref["id"],
99
- }
100
- for ref in references
101
- ]
102
- }
103
- ]
104
- }
105
- ]
106
- score = compute_score(dataset=dataset, predictions=prediction_dict)
107
- return score
 
41
  Computes Average Normalized Levenshtein Similarity (ANLS).
42
  Args:
43
  predictions: List of question-answers dictionaries with the following key-values:
44
+ - 'question_id': id of the question-answer pair as given in the references (see below)
45
  - 'prediction_text': the text of the answer
46
  references: List of question-answers dictionaries with the following key-values:
47
+ - 'question_id': id of the question-answer pair (see above),
48
+ - 'answers': list of possible texts for the answer, as a list of strings
49
+
 
 
 
 
50
  Returns:
51
  'anls': The ANLS score of predicted tokens versus the gold answer
52
  Examples:
53
  >>> predictions = [{'prediction_text': 'Denver Broncos', 'question_id': '56e10a3be3433e1400422b22'}]
54
+ >>> references = [{'answers': ['Denver Broncos', 'Denver R. Broncos'], 'question_id': '56e10a3be3433e1400422b22'}]
55
  >>> anls_metric = evaluate.load("anls")
56
  >>> results = anls_metric.compute(predictions=predictions, references=references)
57
  >>> print(results)
 
68
  inputs_description=_KWARGS_DESCRIPTION,
69
  features=datasets.Features(
70
  {
71
+ "predictions": {"question_id": datasets.Value("string"),
72
+ "prediction_text": datasets.Value("string")},
73
  "references": {
74
+ "question_id": datasets.Value("string"),
75
+ "answers": datasets.features.Sequence(datasets.Value("string")),
 
 
 
 
 
76
  },
77
  }
78
  )
79
  )
80
 
81
  def _compute(self, predictions, references):
82
+ ground_truths = {x['question_id']: x['answers'] for x in references}
83
+ predictions = {x['question_id']: x['prediction_text'] for x in predictions}
84
+ anls_score = compute_score(predictions=predictions, ground_truths=ground_truths)
85
+ return {"anls_score": anls_score}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
compute_score.py CHANGED
@@ -1,7 +1,7 @@
1
  from Levenshtein import ratio
2
 
3
 
4
- def anls_compute(predictions, ground_truths):
5
  theta = 0.5
6
  anls_score = 0
7
  for qid, prediction in predictions.items():
@@ -18,20 +18,15 @@ def anls_compute(predictions, ground_truths):
18
  return anls_score
19
 
20
 
21
- def compute_score(dataset, prediction):
22
- ground_truths = {x['question_id']: x['answers'] for x in dataset}
23
- predictions = {x['question_id']: x['prediction_text'] for x in prediction}
24
- anls_score = anls_compute(predictions=predictions, ground_truths=ground_truths)
25
- return {"anls_score": anls_score}
26
-
27
-
28
  if __name__ == "__main__":
29
- prediction = [{'question_id': '10285', 'prediction_text': 'Denver Broncos'},
30
  {'question_id': '18601', 'prediction_text': '12/15/89'},
31
  {'question_id': '16734', 'prediction_text': 'Dear dr. Lobo'}]
32
 
33
- dataset = [{"answers": ["Denver Broncos", "Denver R. Broncos"], 'question_id': '10285'},
34
  {'answers': ['12/15/88'], 'question_id': '18601'},
35
  {'answers': ['Dear Dr. Lobo', 'Dr. Lobo'], 'question_id': '16734'}]
36
- anls_score = compute_score(dataset=dataset, prediction=prediction)
 
 
37
  print(anls_score)
 
1
  from Levenshtein import ratio
2
 
3
 
4
+ def compute_score(predictions, ground_truths):
5
  theta = 0.5
6
  anls_score = 0
7
  for qid, prediction in predictions.items():
 
18
  return anls_score
19
 
20
 
 
 
 
 
 
 
 
21
  if __name__ == "__main__":
22
+ predictions = [{'question_id': '10285', 'prediction_text': 'Denver Broncos'},
23
  {'question_id': '18601', 'prediction_text': '12/15/89'},
24
  {'question_id': '16734', 'prediction_text': 'Dear dr. Lobo'}]
25
 
26
+ references = [{"answers": ["Denver Broncos", "Denver R. Broncos"], 'question_id': '10285'},
27
  {'answers': ['12/15/88'], 'question_id': '18601'},
28
  {'answers': ['Dear Dr. Lobo', 'Dr. Lobo'], 'question_id': '16734'}]
29
+ ground_truths = {x['question_id']: x['answers'] for x in references}
30
+ predictions = {x['question_id']: x['prediction_text'] for x in predictions}
31
+ anls_score = compute_score(predictions=predictions, ground_truths=ground_truths)
32
  print(anls_score)