bhys commited on
Commit
8d66a77
1 Parent(s): 541b69c

Update scorer.py

Browse files
Files changed (1) hide show
  1. scorer.py +70 -0
scorer.py CHANGED
@@ -18,6 +18,25 @@ def normalize_number_str(number_str: str) -> float:
18
  return float("inf")
19
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def split_string(
22
  s: str,
23
  char_list: list[str] = [",", ";"],
@@ -37,6 +56,57 @@ def question_scorer(
37
  except ValueError:
38
  return False
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  score = 0
41
  if user_task["final_answer"] == val["Final answer"]:
42
  score = val["Total score"]
 
18
  return float("inf")
19
 
20
 
21
+ def normalize_answer(a):
22
+ # Lower case
23
+ # Trim (left and right)
24
+ # Replace multiple spaces with one space
25
+ # Remove trailing punctuation
26
+ # return re.sub(r"[\.\!\?]+$", "", re.sub(r"\s+", " ", a.strip().lower()))
27
+ if isinstance(a, list):
28
+ a = ''.join(a)
29
+ return a.strip().lower()
30
+
31
+
32
+ def exact_match(answer, ground_truth):
33
+ return normalize_answer(answer) == normalize_answer(ground_truth)
34
+
35
+
36
+ def keyword_match(answer, ground_truth):
37
+ return normalize_answer(ground_truth) in normalize_answer(answer)
38
+
39
+
40
  def split_string(
41
  s: str,
42
  char_list: list[str] = [",", ";"],
 
56
  except ValueError:
57
  return False
58
 
59
+ # 打分机制
60
+ level = 0
61
+ expertise = 0
62
+ reasoning = 0
63
+ comprehension = 0
64
+
65
+ final_answer = user_task["final_answer"]
66
+ expected_answer = val["Final answer"]
67
+
68
+ data = val["score"]
69
+ chat_score = []
70
+ for i in range(len(data['type'])):
71
+ item = {
72
+ 'type': data['type'][i],
73
+ 'question': data['question'][i],
74
+ 'choices': data['choices'][i],
75
+ 'answer': data['answer'][i],
76
+ 'expertise': data['expertise'][i],
77
+ 'reasoning': data['reasoning'][i],
78
+ 'comprehension': data['comprehension'][i],
79
+ 'score': data['score'][i]
80
+ }
81
+ chat_score.append(item)
82
+
83
+ for i, score_item in enumerate(chat_score):
84
+ answer_true = False
85
+ if score_item['type'].lower() == 'multiple choice':
86
+ if exact_match(user_task["score_answer"][i], score_item['answer']):
87
+ answer_true = True
88
+ elif score_item['type'].lower() == 'fill in the blanks':
89
+ if keyword_match(user_task["score_answer"][i], score_item['answer']):
90
+ answer_true = True
91
+ elif score_item['type'].lower() == 'short answer questions':
92
+ for ground_truth in score_item['answer']:
93
+ if keyword_match(user_task["score_answer"][i], ground_truth):
94
+ answer_true = True
95
+ break
96
+ # print(answer, score_item['answer'], answer_true)
97
+ # 加分
98
+ if answer_true:
99
+ expertise += score_item['expertise']
100
+ reasoning += score_item['reasoning']
101
+ comprehension += score_item['comprehension']
102
+ if score_item['score'] > level:
103
+ level = score_item['score']
104
+ print([level, expertise, reasoning, comprehension])
105
+ # final_answer正确 则满分,但是能力分不加了
106
+ if expected_answer and exact_match(final_answer, expected_answer):
107
+ level = 10
108
+ return [level, expertise, reasoning, comprehension]
109
+
110
  score = 0
111
  if user_task["final_answer"] == val["Final answer"]:
112
  score = val["Total score"]