Viona commited on
Commit
29d0f05
1 Parent(s): 6a4fac9

adding ANLS logic

Browse files
Files changed (4) hide show
  1. README.md +6 -0
  2. anls.py +2 -2
  3. compute_score.py +32 -33
  4. requirements.txt +2 -2
README.md CHANGED
@@ -10,3 +10,9 @@ pinned: false
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ Please note that we are considering including other evaluation metrics , which are popular in VQA and Reading Comprehension tasks.
15
+
16
+ Answers are not case sensitive
17
+ Answers are space sensitive
18
+ Answers or tokens comprising answers are not limited to a fixed size dictionary. It could be any word/token which is present in the document.
anls.py CHANGED
@@ -54,8 +54,8 @@ Args:
54
  Returns:
55
  'anls': The ANLS score of predicted tokens versus the gold answer
56
  Examples:
57
- >>> predictions = [{'prediction_text': '1976', 'id': '56e10a3be3433e1400422b22'}]
58
- >>> references = [{'answers': {'answer_start': [97], 'text': ['1976']}, 'id': '56e10a3be3433e1400422b22'}]
59
  >>> anls_metric = evaluate.load("anls")
60
  >>> results = anls_metric.compute(predictions=predictions, references=references)
61
  >>> print(results)
 
54
  Returns:
55
  'anls': The ANLS score of predicted tokens versus the gold answer
56
  Examples:
57
+ >>> predictions = [{'prediction_text': 'Denver Broncos', 'question_id': '56e10a3be3433e1400422b22'}]
58
+ >>> references = [{'answers': ['Denver Broncos', 'Denver R. Broncos']}, 'question_id': '56e10a3be3433e1400422b22'}]
59
  >>> anls_metric = evaluate.load("anls")
60
  >>> results = anls_metric.compute(predictions=predictions, references=references)
61
  >>> print(results)
compute_score.py CHANGED
@@ -1,38 +1,37 @@
1
- import sys
2
- from collections import Counter
3
  from Levenshtein import ratio
4
 
5
 
6
- def anls_compute(prediction, ground_truth):
7
- prediction_tokens = prediction.split()
8
- ground_truth_tokens = ground_truth.split()
9
- common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
10
- num_same = sum(common.values())
11
- if num_same == 0:
12
- return 0
13
- precision = 1.0 * num_same / len(prediction_tokens)
14
- recall = 1.0 * num_same / len(ground_truth_tokens)
15
- f1 = (2 * precision * recall) / (precision + recall)
16
- return f1
17
-
18
-
19
- def compute_score(dataset, predictions):
20
- anls_score = total = 0
21
- for article in dataset:
22
- for paragraph in article["paragraphs"]:
23
- for qa in paragraph["qas"]:
24
- total += 1
25
- if qa["id"] not in predictions:
26
- message = "Unanswered question " + qa["id"] + " will receive score 0."
27
- print(message, file=sys.stderr)
28
- continue
29
- ground_truths = list(map(lambda x: x["text"], qa["answers"]))
30
- prediction = predictions[qa["id"]]
31
- score = anls_compute(prediction=prediction, ground_truth=ground_truths)
32
- # exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
33
- # f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
34
- #
35
- # exact_match = 100.0 * exact_match / total
36
- # f1 = 100.0 * f1 / total
37
 
 
 
 
 
 
 
 
38
  return {"anls_score": anls_score}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from Levenshtein import ratio
2
 
3
 
4
+ def anls_compute(predictions, ground_truths):
5
+ theta = 0.5
6
+ anls_score = 0
7
+ for qid, prediction in predictions.items():
8
+ max_value = 0
9
+ if qid in ground_truths:
10
+ for x in ground_truths[qid]:
11
+ nl = ratio(prediction, x)
12
+ if nl < theta:
13
+ score = 1 - nl
14
+ if score > max_value:
15
+ max_value = score
16
+ anls_score += max_value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ return anls_score
19
+
20
+
21
+ def compute_score(dataset, prediction):
22
+ ground_truths = {x['question_id']: x['answers'] for x in dataset}
23
+ predictions = {x['question_id']: x['prediction_text'] for x in prediction}
24
+ anls_score = anls_compute(predictions=predictions, ground_truths=ground_truths)
25
  return {"anls_score": anls_score}
26
+
27
+
28
+ if __name__ == "__main__":
29
+ prediction = [{'question_id': '10285', 'prediction_text': 'Denver Broncos'},
30
+ {'question_id': '18601', 'prediction_text': '12/15/89'},
31
+ {'question_id': '16734', 'prediction_text': 'Dear dr. Lobo'}]
32
+
33
+ dataset = [{"answers": ["Denver Broncos", "Denver R. Broncos"], 'question_id': '10285'},
34
+ {'answers': ['12/15/88'], 'question_id': '18601'},
35
+ {'answers': ['Dear Dr. Lobo', 'Dr. Lobo'], 'question_id': '16734'}]
36
+ anls_score = compute_score(dataset=dataset, prediction=prediction)
37
+ print(anls_score)
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- git+https://github.com/huggingface/evaluate@{COMMIT_PLACEHOLDER}
2
- git+https://github.com/maxbachmann/python-Levenshtein.git
 
1
+ evaluate
2
+ python-Levenshtein