adding ANLS logic
Browse files- README.md +6 -0
- anls.py +2 -2
- compute_score.py +32 -33
- requirements.txt +2 -2
README.md
CHANGED
@@ -10,3 +10,9 @@ pinned: false
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
13 |
+
|
14 |
+
Please note that we are considering including other evaluation metrics , which are popular in VQA and Reading Comprehension tasks.
|
15 |
+
|
16 |
+
Answers are not case sensitive
|
17 |
+
Answers are space sensitive
|
18 |
+
Answers or tokens comprising answers are not limited to a fixed size dictionary. It could be any word/token which is present in the document.
|
anls.py
CHANGED
@@ -54,8 +54,8 @@ Args:
|
|
54 |
Returns:
|
55 |
'anls': The ANLS score of predicted tokens versus the gold answer
|
56 |
Examples:
|
57 |
-
>>> predictions = [{'prediction_text': '
|
58 |
-
>>> references = [{'answers':
|
59 |
>>> anls_metric = evaluate.load("anls")
|
60 |
>>> results = anls_metric.compute(predictions=predictions, references=references)
|
61 |
>>> print(results)
|
|
|
54 |
Returns:
|
55 |
'anls': The ANLS score of predicted tokens versus the gold answer
|
56 |
Examples:
|
57 |
+
>>> predictions = [{'prediction_text': 'Denver Broncos', 'question_id': '56e10a3be3433e1400422b22'}]
|
58 |
+
>>> references = [{'answers': ['Denver Broncos', 'Denver R. Broncos']}, 'question_id': '56e10a3be3433e1400422b22'}]
|
59 |
>>> anls_metric = evaluate.load("anls")
|
60 |
>>> results = anls_metric.compute(predictions=predictions, references=references)
|
61 |
>>> print(results)
|
compute_score.py
CHANGED
@@ -1,38 +1,37 @@
|
|
1 |
-
import sys
|
2 |
-
from collections import Counter
|
3 |
from Levenshtein import ratio
|
4 |
|
5 |
|
6 |
-
def anls_compute(
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
def compute_score(dataset, predictions):
|
20 |
-
anls_score = total = 0
|
21 |
-
for article in dataset:
|
22 |
-
for paragraph in article["paragraphs"]:
|
23 |
-
for qa in paragraph["qas"]:
|
24 |
-
total += 1
|
25 |
-
if qa["id"] not in predictions:
|
26 |
-
message = "Unanswered question " + qa["id"] + " will receive score 0."
|
27 |
-
print(message, file=sys.stderr)
|
28 |
-
continue
|
29 |
-
ground_truths = list(map(lambda x: x["text"], qa["answers"]))
|
30 |
-
prediction = predictions[qa["id"]]
|
31 |
-
score = anls_compute(prediction=prediction, ground_truth=ground_truths)
|
32 |
-
# exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
|
33 |
-
# f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
|
34 |
-
#
|
35 |
-
# exact_match = 100.0 * exact_match / total
|
36 |
-
# f1 = 100.0 * f1 / total
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
return {"anls_score": anls_score}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from Levenshtein import ratio
|
2 |
|
3 |
|
4 |
+
def anls_compute(predictions, ground_truths):
|
5 |
+
theta = 0.5
|
6 |
+
anls_score = 0
|
7 |
+
for qid, prediction in predictions.items():
|
8 |
+
max_value = 0
|
9 |
+
if qid in ground_truths:
|
10 |
+
for x in ground_truths[qid]:
|
11 |
+
nl = ratio(prediction, x)
|
12 |
+
if nl < theta:
|
13 |
+
score = 1 - nl
|
14 |
+
if score > max_value:
|
15 |
+
max_value = score
|
16 |
+
anls_score += max_value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
return anls_score
|
19 |
+
|
20 |
+
|
21 |
+
def compute_score(dataset, prediction):
|
22 |
+
ground_truths = {x['question_id']: x['answers'] for x in dataset}
|
23 |
+
predictions = {x['question_id']: x['prediction_text'] for x in prediction}
|
24 |
+
anls_score = anls_compute(predictions=predictions, ground_truths=ground_truths)
|
25 |
return {"anls_score": anls_score}
|
26 |
+
|
27 |
+
|
28 |
+
if __name__ == "__main__":
|
29 |
+
prediction = [{'question_id': '10285', 'prediction_text': 'Denver Broncos'},
|
30 |
+
{'question_id': '18601', 'prediction_text': '12/15/89'},
|
31 |
+
{'question_id': '16734', 'prediction_text': 'Dear dr. Lobo'}]
|
32 |
+
|
33 |
+
dataset = [{"answers": ["Denver Broncos", "Denver R. Broncos"], 'question_id': '10285'},
|
34 |
+
{'answers': ['12/15/88'], 'question_id': '18601'},
|
35 |
+
{'answers': ['Dear Dr. Lobo', 'Dr. Lobo'], 'question_id': '16734'}]
|
36 |
+
anls_score = compute_score(dataset=dataset, prediction=prediction)
|
37 |
+
print(anls_score)
|
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
|
2 |
-
|
|
|
1 |
+
evaluate
|
2 |
+
python-Levenshtein
|