Spaces:
Running
Running
Update evaluation/evaluator.py
Browse files- evaluation/evaluator.py +11 -10
evaluation/evaluator.py
CHANGED
@@ -2,8 +2,8 @@ import json
|
|
2 |
from evaluation.evaluate_utils.evaluate_factory import get_evaluator
|
3 |
import numpy as np
|
4 |
|
5 |
-
def find_isnan(samp):
|
6 |
|
|
|
7 |
try:
|
8 |
if np.isnan(samp):
|
9 |
return True
|
@@ -11,10 +11,9 @@ def find_isnan(samp):
|
|
11 |
return False
|
12 |
except:
|
13 |
return False
|
14 |
-
|
15 |
|
16 |
-
def fix_ans(answer):
|
17 |
|
|
|
18 |
try:
|
19 |
answer = answer.replace("{'", '{"').replace("', '", '", "').replace("': '", '": "').replace("'}", '"}')
|
20 |
answer = answer.replace("': ", '": ')
|
@@ -24,7 +23,6 @@ def fix_ans(answer):
|
|
24 |
|
25 |
|
26 |
def parse_answer(answer):
|
27 |
-
|
28 |
if len(answer) == 1:
|
29 |
ans, is_num = fix_number(answer[0])
|
30 |
if is_num:
|
@@ -47,7 +45,6 @@ def parse_answer(answer):
|
|
47 |
|
48 |
|
49 |
def fix_number(number):
|
50 |
-
|
51 |
if type(number) == str:
|
52 |
copy_ans = number
|
53 |
copy_ans = ' '.join(' '.join(' '.join(copy_ans.split('$')).split('%')).split('sqft')).strip()
|
@@ -64,8 +61,8 @@ def fix_number(number):
|
|
64 |
|
65 |
|
66 |
def fix_prediction(prediction, gold_answer, evaluator):
|
67 |
-
|
68 |
-
|
69 |
prediction = fix_number(prediction[0])
|
70 |
|
71 |
if type(prediction) != list:
|
@@ -86,20 +83,24 @@ def fix_prediction(prediction, gold_answer, evaluator):
|
|
86 |
|
87 |
|
88 |
def question_scorer(prediction, gold_answer):
|
89 |
-
|
90 |
try:
|
91 |
prediction = json.loads(prediction)
|
92 |
except:
|
93 |
prediction = prediction
|
94 |
|
95 |
-
answer_list = [x for x in gold_answer.split("\n") if len(x.strip()) > 0] if type(
|
|
|
96 |
gold_answer, evaluator = parse_answer(answer_list)
|
97 |
prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)
|
98 |
|
99 |
has_ans = 1.
|
100 |
if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
|
101 |
has_ans = 0.
|
102 |
-
|
|
|
|
|
|
|
|
|
103 |
if not run_eval:
|
104 |
return 0., has_ans
|
105 |
|
|
|
2 |
from evaluation.evaluate_utils.evaluate_factory import get_evaluator
|
3 |
import numpy as np
|
4 |
|
|
|
5 |
|
6 |
+
def find_isnan(samp):
|
7 |
try:
|
8 |
if np.isnan(samp):
|
9 |
return True
|
|
|
11 |
return False
|
12 |
except:
|
13 |
return False
|
|
|
14 |
|
|
|
15 |
|
16 |
+
def fix_ans(answer):
|
17 |
try:
|
18 |
answer = answer.replace("{'", '{"').replace("', '", '", "').replace("': '", '": "').replace("'}", '"}')
|
19 |
answer = answer.replace("': ", '": ')
|
|
|
23 |
|
24 |
|
25 |
def parse_answer(answer):
|
|
|
26 |
if len(answer) == 1:
|
27 |
ans, is_num = fix_number(answer[0])
|
28 |
if is_num:
|
|
|
45 |
|
46 |
|
47 |
def fix_number(number):
|
|
|
48 |
if type(number) == str:
|
49 |
copy_ans = number
|
50 |
copy_ans = ' '.join(' '.join(' '.join(copy_ans.split('$')).split('%')).split('sqft')).strip()
|
|
|
61 |
|
62 |
|
63 |
def fix_prediction(prediction, gold_answer, evaluator):
|
64 |
+
if type(prediction) == list and len(prediction) == 1 and (
|
65 |
+
type(prediction[0]) == int or ((type(prediction[0]) == str) and prediction[0].isnumeric())):
|
66 |
prediction = fix_number(prediction[0])
|
67 |
|
68 |
if type(prediction) != list:
|
|
|
83 |
|
84 |
|
85 |
def question_scorer(prediction, gold_answer):
|
|
|
86 |
try:
|
87 |
prediction = json.loads(prediction)
|
88 |
except:
|
89 |
prediction = prediction
|
90 |
|
91 |
+
answer_list = [x for x in gold_answer.split("\n") if len(x.strip()) > 0] if type(
|
92 |
+
gold_answer) != list else gold_answer
|
93 |
gold_answer, evaluator = parse_answer(answer_list)
|
94 |
prediction, run_eval = fix_prediction(prediction, gold_answer, evaluator)
|
95 |
|
96 |
has_ans = 1.
|
97 |
if (type(prediction) != float and len(prediction) == 0) or find_isnan(prediction):
|
98 |
has_ans = 0.
|
99 |
+
|
100 |
+
if type(prediction) == list:
|
101 |
+
if all((type(pred) not in {float, int} and len(pred) == 0) or find_isnan(pred) for pred in prediction):
|
102 |
+
has_ans = 0
|
103 |
+
|
104 |
if not run_eval:
|
105 |
return 0., has_ans
|
106 |
|