Spaces:
Sleeping
Sleeping
first commit
Browse files- execution_accuracy.py +25 -6
- requirements.txt +2 -1
execution_accuracy.py
CHANGED
@@ -15,7 +15,7 @@
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
-
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
21 |
_CITATION = """\
|
@@ -71,8 +71,9 @@ class ExecutionAccuracy(evaluate.Metric):
|
|
71 |
inputs_description=_KWARGS_DESCRIPTION,
|
72 |
# This defines the format of each prediction and reference
|
73 |
features=datasets.Features({
|
74 |
-
'predictions': datasets.Value('
|
75 |
-
'references': datasets.Value('
|
|
|
76 |
}),
|
77 |
# Homepage of the module for documentation
|
78 |
homepage="http://module.homepage",
|
@@ -86,10 +87,28 @@ class ExecutionAccuracy(evaluate.Metric):
|
|
86 |
# TODO: Download external resources if needed
|
87 |
pass
|
88 |
|
89 |
-
def _compute(self, predictions, references):
|
90 |
"""Returns the scores"""
|
91 |
# TODO: Compute the different scores of the module
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
return {
|
94 |
-
"accuracy": accuracy,
|
95 |
}
|
|
|
15 |
|
16 |
import evaluate
|
17 |
import datasets
|
18 |
+
from records import Database
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
21 |
_CITATION = """\
|
|
|
71 |
inputs_description=_KWARGS_DESCRIPTION,
|
72 |
# This defines the format of each prediction and reference
|
73 |
features=datasets.Features({
|
74 |
+
'predictions': datasets.Value('string'),
|
75 |
+
'references': datasets.Value('string'),
|
76 |
+
'db_urls': datasets.Value('string'),
|
77 |
}),
|
78 |
# Homepage of the module for documentation
|
79 |
homepage="http://module.homepage",
|
|
|
87 |
# TODO: Download external resources if needed
|
88 |
pass
|
89 |
|
90 |
+
def _compute(self, predictions, references, db_urls):
|
91 |
"""Returns the scores"""
|
92 |
# TODO: Compute the different scores of the module
|
93 |
+
cnt = 0
|
94 |
+
for prediction, reference, db_url in zip(predictions, references, db_urls):
|
95 |
+
db = Database(db_url)
|
96 |
+
try:
|
97 |
+
pred = db.query(predictions).as_dict()
|
98 |
+
except Exception as e:
|
99 |
+
pred = []
|
100 |
+
try:
|
101 |
+
ref = db.query(references).as_dict()
|
102 |
+
except Exception as e:
|
103 |
+
ref = []
|
104 |
+
pred = [tuple(x.values()) for x in pred]
|
105 |
+
ref = [tuple(x.values()) for x in ref]
|
106 |
+
if len(pred) == len(ref):
|
107 |
+
pred.sort(key=lambda x: hash(x))
|
108 |
+
ref.sort(key=lambda x: hash(x))
|
109 |
+
if pred == ref:
|
110 |
+
cnt += 1
|
111 |
+
accuracy = cnt / len(predictions)
|
112 |
return {
|
113 |
+
"execution accuracy": accuracy,
|
114 |
}
|
requirements.txt
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
git+https://github.com/huggingface/evaluate@main
|
|
|
|
1 |
+
git+https://github.com/huggingface/evaluate@main
|
2 |
+
records
|