Spaces:

yqsong
/

execution_accuracy

Sleeping

App Files Files Community

yqsong commited on Nov 23, 2023

Commit

196f8b3

•

1 Parent(s): d27541e

first commit

Browse files

Files changed (2) hide show

execution_accuracy.py +25 -6
requirements.txt +2 -1

execution_accuracy.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import evaluate
 import datasets
 # TODO: Add BibTeX citation
 _CITATION = """\
@@ -71,8 +71,9 @@ class ExecutionAccuracy(evaluate.Metric):
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
             features=datasets.Features({
-                'predictions': datasets.Value('int64'),
-                'references': datasets.Value('int64'),
             }),
             # Homepage of the module for documentation
             homepage="http://module.homepage",
@@ -86,10 +87,28 @@ class ExecutionAccuracy(evaluate.Metric):
         # TODO: Download external resources if needed
         pass
-    def _compute(self, predictions, references):
         """Returns the scores"""
         # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
         return {
-            "accuracy": accuracy,
         }

 import evaluate
 import datasets
+from records import Database
 # TODO: Add BibTeX citation
 _CITATION = """\
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
             features=datasets.Features({
+                'predictions': datasets.Value('string'),
+                'references': datasets.Value('string'),
+                'db_urls': datasets.Value('string'),
             }),
             # Homepage of the module for documentation
             homepage="http://module.homepage",
         # TODO: Download external resources if needed
         pass
+    def _compute(self, predictions, references, db_urls):
         """Returns the scores"""
         # TODO: Compute the different scores of the module
+        cnt = 0
+        for prediction, reference, db_url in zip(predictions, references, db_urls):
+            db = Database(db_url)
+            try:
+                pred = db.query(predictions).as_dict()
+            except Exception as e:
+                pred = []
+            try:
+                ref = db.query(references).as_dict()
+            except Exception as e:
+                ref = []
+            pred = [tuple(x.values()) for x in pred]
+            ref = [tuple(x.values()) for x in ref]
+            if len(pred) == len(ref):
+                pred.sort(key=lambda x: hash(x))
+                ref.sort(key=lambda x: hash(x))
+                if pred == ref:
+                    cnt += 1
+        accuracy = cnt / len(predictions)
         return {
+            "execution accuracy": accuracy,
         }

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- git+https://github.com/huggingface/evaluate@main


1	+ git+https://github.com/huggingface/evaluate@main
2	+ records