Spaces:
Running
Running
add gsm8k
Browse files
tlem.py
CHANGED
@@ -77,6 +77,14 @@ class ReasoningMetric(evaluate.Metric):
|
|
77 |
return results
|
78 |
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
class Suite(EvaluationSuite):
|
81 |
def run(
|
82 |
self,
|
@@ -110,6 +118,8 @@ class Suite(EvaluationSuite):
|
|
110 |
suite = MMLU.suite(chat=chat)
|
111 |
case _ if name.startswith("cmmlu"):
|
112 |
suite = CMMLU.suite(chat=chat)
|
|
|
|
|
113 |
match name:
|
114 |
case _ if "test" in name:
|
115 |
suite = suite["Test"]
|
@@ -120,20 +130,7 @@ class Suite(EvaluationSuite):
|
|
120 |
super().__init__(name)
|
121 |
self.cached_result = {}
|
122 |
|
123 |
-
match self.name:
|
124 |
-
case "cmmlu":
|
125 |
-
pass
|
126 |
-
|
127 |
self.suite = [
|
128 |
-
Task(
|
129 |
-
dataset_name=("gsm8k", "main"),
|
130 |
-
metric_name=("sustech/tlem", "gsm8k"),
|
131 |
-
input_column="question",
|
132 |
-
label_column="answer",
|
133 |
-
)
|
134 |
# TASK_REGISTRY["gsm8k"],
|
135 |
# TASK_REGISTRY["competition_math"],
|
136 |
]
|
137 |
-
|
138 |
-
|
139 |
-
# %%
|
|
|
77 |
return results
|
78 |
|
79 |
|
80 |
+
gsm8k = Task(
|
81 |
+
dataset_name=("gsm8k", "main"),
|
82 |
+
metric_name=("sustech/tlem", "gsm8k"),
|
83 |
+
input_column="question",
|
84 |
+
label_column="answer",
|
85 |
+
)
|
86 |
+
|
87 |
+
|
88 |
class Suite(EvaluationSuite):
|
89 |
def run(
|
90 |
self,
|
|
|
118 |
suite = MMLU.suite(chat=chat)
|
119 |
case _ if name.startswith("cmmlu"):
|
120 |
suite = CMMLU.suite(chat=chat)
|
121 |
+
case "gsm8k":
|
122 |
+
suite = [gsm8k]
|
123 |
match name:
|
124 |
case _ if "test" in name:
|
125 |
suite = suite["Test"]
|
|
|
130 |
super().__init__(name)
|
131 |
self.cached_result = {}
|
132 |
|
|
|
|
|
|
|
|
|
133 |
self.suite = [
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
# TASK_REGISTRY["gsm8k"],
|
135 |
# TASK_REGISTRY["competition_math"],
|
136 |
]
|
|
|
|
|
|