Spaces:

Viona
/

anls

Runtime error

App Files Files Community

Viona commited on Feb 4, 2023

Commit

29d0f05

•

1 Parent(s): 6a4fac9

adding ANLS logic

Browse files

Files changed (4) hide show

README.md +6 -0
anls.py +2 -2
compute_score.py +32 -33
requirements.txt +2 -2

README.md CHANGED Viewed

@@ -10,3 +10,9 @@ pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Please note that we are considering including other evaluation metrics , which are popular in VQA and Reading Comprehension tasks.
+Answers are not case sensitive
+Answers are space sensitive
+Answers or tokens comprising answers are not limited to a fixed size dictionary. It could be any word/token which is present in the document.

anls.py CHANGED Viewed

@@ -54,8 +54,8 @@ Args:
 Returns:
     'anls': The ANLS score of predicted tokens versus the gold answer
 Examples:
-    >>> predictions = [{'prediction_text': '1976', 'id': '56e10a3be3433e1400422b22'}]
-    >>> references = [{'answers': {'answer_start': [97], 'text': ['1976']}, 'id': '56e10a3be3433e1400422b22'}]
     >>> anls_metric = evaluate.load("anls")
     >>> results = anls_metric.compute(predictions=predictions, references=references)
     >>> print(results)

 Returns:
     'anls': The ANLS score of predicted tokens versus the gold answer
 Examples:
+    >>> predictions = [{'prediction_text': 'Denver Broncos', 'question_id': '56e10a3be3433e1400422b22'}]
+    >>> references = [{'answers': ['Denver Broncos', 'Denver R. Broncos']}, 'question_id': '56e10a3be3433e1400422b22'}]
     >>> anls_metric = evaluate.load("anls")
     >>> results = anls_metric.compute(predictions=predictions, references=references)
     >>> print(results)

compute_score.py CHANGED Viewed

@@ -1,38 +1,37 @@
-import sys
-from collections import Counter
 from Levenshtein import ratio
-def anls_compute(prediction, ground_truth):
-    prediction_tokens = prediction.split()
-    ground_truth_tokens = ground_truth.split()
-    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
-    num_same = sum(common.values())
-    if num_same == 0:
-        return 0
-    precision = 1.0 * num_same / len(prediction_tokens)
-    recall = 1.0 * num_same / len(ground_truth_tokens)
-    f1 = (2 * precision * recall) / (precision + recall)
-    return f1
-def compute_score(dataset, predictions):
-    anls_score = total = 0
-    for article in dataset:
-        for paragraph in article["paragraphs"]:
-            for qa in paragraph["qas"]:
-                total += 1
-                if qa["id"] not in predictions:
-                    message = "Unanswered question " + qa["id"] + " will receive score 0."
-                    print(message, file=sys.stderr)
-                    continue
-                ground_truths = list(map(lambda x: x["text"], qa["answers"]))
-                prediction = predictions[qa["id"]]
-                score = anls_compute(prediction=prediction, ground_truth=ground_truths)
-    #             exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
-    #             f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
-    #
-    # exact_match = 100.0 * exact_match / total
-    # f1 = 100.0 * f1 / total
     return {"anls_score": anls_score}

 from Levenshtein import ratio
+def anls_compute(predictions, ground_truths):
+    theta = 0.5
+    anls_score = 0
+    for qid, prediction in predictions.items():
+        max_value = 0
+        if qid in ground_truths:
+            for x in ground_truths[qid]:
+                nl = ratio(prediction, x)
+                if nl < theta:
+                    score = 1 - nl
+                    if score > max_value:
+                        max_value = score
+            anls_score += max_value
+    return anls_score
+def compute_score(dataset, prediction):
+    ground_truths = {x['question_id']: x['answers'] for x in dataset}
+    predictions = {x['question_id']: x['prediction_text'] for x in prediction}
+    anls_score = anls_compute(predictions=predictions, ground_truths=ground_truths)
     return {"anls_score": anls_score}
+if __name__ == "__main__":
+    prediction = [{'question_id': '10285', 'prediction_text': 'Denver Broncos'},
+                   {'question_id': '18601', 'prediction_text': '12/15/89'},
+                   {'question_id': '16734', 'prediction_text': 'Dear dr. Lobo'}]
+    dataset = [{"answers": ["Denver Broncos", "Denver R. Broncos"], 'question_id': '10285'},
+               {'answers': ['12/15/88'], 'question_id': '18601'},
+               {'answers': ['Dear Dr. Lobo', 'Dr. Lobo'], 'question_id': '16734'}]
+    anls_score = compute_score(dataset=dataset, prediction=prediction)
+    print(anls_score)

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- ~~git+https://github.com/huggingface/~~evaluate~~@{COMMIT_PLACEHOLDER}~~
2	- ~~git+https://github.com/maxbachmann/~~python-Levenshtein~~.git~~


1	+ evaluate
2	+ python-Levenshtein