Spaces:

Viona
/

anls

Runtime error

App Files Files Community

Viona commited on Feb 4, 2023

Commit

6be1f2a

•

1 Parent(s): 4beaa0d

correcting metric info

Browse files

Files changed (2) hide show

anls.py +13 -35
compute_score.py +6 -11

anls.py CHANGED Viewed

@@ -41,21 +41,17 @@ _KWARGS_DESCRIPTION = """
 Computes Average Normalized Levenshtein Similarity (ANLS).
 Args:
     predictions: List of question-answers dictionaries with the following key-values:
-        - 'id': id of the question-answer pair as given in the references (see below)
         - 'prediction_text': the text of the answer
     references: List of question-answers dictionaries with the following key-values:
-        - 'id': id of the question-answer pair (see above),
-        - 'answers': a Dict in the SQuAD dataset format
-            {
-                'text': list of possible texts for the answer, as a list of strings
-                'answer_start': list of start positions for the answer, as a list of ints
-            }
-            Note that answer_start values are not taken into account to compute the metric.
 Returns:
     'anls': The ANLS score of predicted tokens versus the gold answer
 Examples:
     >>> predictions = [{'prediction_text': 'Denver Broncos', 'question_id': '56e10a3be3433e1400422b22'}]
-    >>> references = [{'answers': ['Denver Broncos', 'Denver R. Broncos']}, 'question_id': '56e10a3be3433e1400422b22'}]
     >>> anls_metric = evaluate.load("anls")
     >>> results = anls_metric.compute(predictions=predictions, references=references)
     >>> print(results)
@@ -72,36 +68,18 @@ class Anls(evaluate.Metric):
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(
                 {
-                    "predictions": {"id": datasets.Value("string"), "prediction_text": datasets.Value("string")},
                     "references": {
-                        "id": datasets.Value("string"),
-                        "answers": datasets.features.Sequence(
-                            {
-                                "text": datasets.Value("string"),
-                                "answer_start": datasets.Value("int32"),
-                            }
-                        ),
                     },
                 }
             )
         )
     def _compute(self, predictions, references):
-        prediction_dict = {prediction["id"]: prediction["prediction_text"] for prediction in predictions}
-        dataset = [
-            {
-                "paragraphs": [
-                    {
-                        "qas": [
-                            {
-                                "answers": [{"text": answer_text} for answer_text in ref["answers"]["text"]],
-                                "id": ref["id"],
-                            }
-                            for ref in references
-                        ]
-                    }
-                ]
-            }
-        ]
-        score = compute_score(dataset=dataset, predictions=prediction_dict)
-        return score

 Computes Average Normalized Levenshtein Similarity (ANLS).
 Args:
     predictions: List of question-answers dictionaries with the following key-values:
+        - 'question_id': id of the question-answer pair as given in the references (see below)
         - 'prediction_text': the text of the answer
     references: List of question-answers dictionaries with the following key-values:
+        - 'question_id': id of the question-answer pair (see above),
+        - 'answers': list of possible texts for the answer, as a list of strings
 Returns:
     'anls': The ANLS score of predicted tokens versus the gold answer
 Examples:
     >>> predictions = [{'prediction_text': 'Denver Broncos', 'question_id': '56e10a3be3433e1400422b22'}]
+    >>> references = [{'answers': ['Denver Broncos', 'Denver R. Broncos'], 'question_id': '56e10a3be3433e1400422b22'}]
     >>> anls_metric = evaluate.load("anls")
     >>> results = anls_metric.compute(predictions=predictions, references=references)
     >>> print(results)
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(
                 {
+                    "predictions": {"question_id": datasets.Value("string"),
+                                    "prediction_text": datasets.Value("string")},
                     "references": {
+                        "question_id": datasets.Value("string"),
+                        "answers": datasets.features.Sequence(datasets.Value("string")),
                     },
                 }
             )
         )
     def _compute(self, predictions, references):
+        ground_truths = {x['question_id']: x['answers'] for x in references}
+        predictions = {x['question_id']: x['prediction_text'] for x in predictions}
+        anls_score = compute_score(predictions=predictions, ground_truths=ground_truths)
+        return {"anls_score": anls_score}

compute_score.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from Levenshtein import ratio
-def anls_compute(predictions, ground_truths):
     theta = 0.5
     anls_score = 0
     for qid, prediction in predictions.items():
@@ -18,20 +18,15 @@ def anls_compute(predictions, ground_truths):
     return anls_score
-def compute_score(dataset, prediction):
-    ground_truths = {x['question_id']: x['answers'] for x in dataset}
-    predictions = {x['question_id']: x['prediction_text'] for x in prediction}
-    anls_score = anls_compute(predictions=predictions, ground_truths=ground_truths)
-    return {"anls_score": anls_score}
 if __name__ == "__main__":
-    prediction = [{'question_id': '10285', 'prediction_text': 'Denver Broncos'},
                    {'question_id': '18601', 'prediction_text': '12/15/89'},
                    {'question_id': '16734', 'prediction_text': 'Dear dr. Lobo'}]
-    dataset = [{"answers": ["Denver Broncos", "Denver R. Broncos"], 'question_id': '10285'},
                {'answers': ['12/15/88'], 'question_id': '18601'},
                {'answers': ['Dear Dr. Lobo', 'Dr. Lobo'], 'question_id': '16734'}]
-    anls_score = compute_score(dataset=dataset, prediction=prediction)
     print(anls_score)

 from Levenshtein import ratio
+def compute_score(predictions, ground_truths):
     theta = 0.5
     anls_score = 0
     for qid, prediction in predictions.items():
     return anls_score
 if __name__ == "__main__":
+    predictions = [{'question_id': '10285', 'prediction_text': 'Denver Broncos'},
                    {'question_id': '18601', 'prediction_text': '12/15/89'},
                    {'question_id': '16734', 'prediction_text': 'Dear dr. Lobo'}]
+    references = [{"answers": ["Denver Broncos", "Denver R. Broncos"], 'question_id': '10285'},
                {'answers': ['12/15/88'], 'question_id': '18601'},
                {'answers': ['Dear Dr. Lobo', 'Dr. Lobo'], 'question_id': '16734'}]
+    ground_truths = {x['question_id']: x['answers'] for x in references}
+    predictions = {x['question_id']: x['prediction_text'] for x in predictions}
+    anls_score = compute_score(predictions=predictions, ground_truths=ground_truths)
     print(anls_score)