facat commited on
Commit
9199665
1 Parent(s): 075ef98
Files changed (1) hide show
  1. tasks.py +46 -59
tasks.py CHANGED
@@ -10,6 +10,7 @@ from functools import partial
10
  from .utils import *
11
 
12
  from evaluate import load
 
13
 
14
 
15
  def fake_pipeline(prompts: Iterable[str]) -> list[str]:
@@ -78,12 +79,12 @@ class Task:
78
  + example[self.label_column],
79
  }
80
  )[self.input_column]
81
- few_shot_prompts = "\n".join(shots)
82
 
83
  test_ds = test_ds.map(
84
  lambda example: {
85
  self.input_column: few_shot_prompts
86
- + "\n"
87
  + example[self.input_column],
88
  }
89
  )
@@ -177,6 +178,9 @@ class Metrics:
177
 
178
 
179
  class CMMLU:
 
 
 
180
  def prompt_cmmlu(example, chat=False):
181
  prefix = "以下是一道多项选择题,请从A、B、C和D中选择最合适的答案作为这个问题的答案。\n\n" if chat else "问题:"
182
  prompt = prefix + example["Question"]
@@ -283,31 +287,34 @@ class CMMLU:
283
  "Test": ["computer science"],
284
  }
285
 
286
- finer_categories = (
287
- pd.Series(subcategories) # noqa # type: ignore
288
- .explode()
289
- .reset_index()
290
- .set_index(0)
291
- .groupby(0)
292
- .agg(list)["index"]
293
- .to_dict()
294
- )
295
-
296
  @classmethod
297
  def suite(cls, chat=False):
298
- suite = {}
 
 
 
 
 
 
 
 
 
299
  for k, v in cls.categories.items():
300
  for subject in v:
301
- suite[k] = [
302
- Task(
303
- ("haonan-li/cmmlu", subcategories),
304
- metric_name=("sustech/tlem", "cmmlu"),
305
- input_column="prompt",
306
- label_column="Answer",
307
- prompt=partial(cls.prompt_cmmlu, chat=chat),
308
- )
309
- for subcategories in cls.finer_categories[subject]
310
- ]
 
 
 
 
311
  return suite
312
 
313
 
@@ -390,9 +397,6 @@ class MMLU:
390
  }
391
 
392
  categories = {
393
- "Math": [
394
- "math",
395
- ],
396
  "STEM": [
397
  "physics",
398
  "chemistry",
@@ -409,26 +413,7 @@ class MMLU:
409
  "geography",
410
  "psychology",
411
  ],
412
- "Other": ["other", "business", "health"],
413
- "All": [
414
- "physics",
415
- "chemistry",
416
- "biology",
417
- "computer science",
418
- "math",
419
- "engineering",
420
- "history",
421
- "philosophy",
422
- "law",
423
- "politics",
424
- "culture",
425
- "economics",
426
- "geography",
427
- "psychology",
428
- "other",
429
- "business",
430
- "health",
431
- ],
432
  "Test": ["culture"],
433
  }
434
 
@@ -443,19 +428,21 @@ class MMLU:
443
  .agg(list)["index"]
444
  .to_dict()
445
  )
446
- suite = {}
447
  for k, v in cls.categories.items():
448
  for subject in v:
449
- suite[k] = [
450
- Task(
451
- ("lukaemon/mmlu", subcategories),
452
- metric_name=("sustech/tlem", "mmlu"),
453
- input_column=cls.input_column,
454
- label_column=cls.label_column,
455
- prompt=partial(cls.prompt_mmlu, chat=chat),
456
- few_shot=0 if chat else 5,
457
- few_shot_from="validation",
458
- )
459
- for subcategories in finer_categories[subject]
460
- ]
 
 
461
  return suite
 
10
  from .utils import *
11
 
12
  from evaluate import load
13
+ from collections import defaultdict
14
 
15
 
16
  def fake_pipeline(prompts: Iterable[str]) -> list[str]:
 
79
  + example[self.label_column],
80
  }
81
  )[self.input_column]
82
+ few_shot_prompts = "\n\n".join(shots)
83
 
84
  test_ds = test_ds.map(
85
  lambda example: {
86
  self.input_column: few_shot_prompts
87
+ + "\n\n"
88
  + example[self.input_column],
89
  }
90
  )
 
178
 
179
 
180
  class CMMLU:
181
+ input_column = "prompt"
182
+ label_column = "Answer"
183
+
184
  def prompt_cmmlu(example, chat=False):
185
  prefix = "以下是一道多项选择题,请从A、B、C和D中选择最合适的答案作为这个问题的答案。\n\n" if chat else "问题:"
186
  prompt = prefix + example["Question"]
 
287
  "Test": ["computer science"],
288
  }
289
 
 
 
 
 
 
 
 
 
 
 
290
  @classmethod
291
  def suite(cls, chat=False):
292
+ finer_categories = (
293
+ pd.Series(cls.subcategories) # noqa # type: ignore
294
+ .explode()
295
+ .reset_index()
296
+ .set_index(0)
297
+ .groupby(0)
298
+ .agg(list)["index"]
299
+ .to_dict()
300
+ )
301
+ suite = defaultdict(list)
302
  for k, v in cls.categories.items():
303
  for subject in v:
304
+ suite[k].extend(
305
+ [
306
+ Task(
307
+ ("haonan-li/cmmlu", subcategories),
308
+ metric_name=("sustech/tlem", "cmmlu"),
309
+ input_column=cls.input_column,
310
+ label_column=cls.label_column,
311
+ prompt=partial(cls.prompt_cmmlu, chat=chat),
312
+ few_shot=0 if chat else 5,
313
+ few_shot_from="dev",
314
+ )
315
+ for subcategories in finer_categories[subject]
316
+ ]
317
+ )
318
  return suite
319
 
320
 
 
397
  }
398
 
399
  categories = {
 
 
 
400
  "STEM": [
401
  "physics",
402
  "chemistry",
 
413
  "geography",
414
  "psychology",
415
  ],
416
+ "other": ["other", "business", "health"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  "Test": ["culture"],
418
  }
419
 
 
428
  .agg(list)["index"]
429
  .to_dict()
430
  )
431
+ suite = defaultdict(list)
432
  for k, v in cls.categories.items():
433
  for subject in v:
434
+ suite[k].extend(
435
+ [
436
+ Task(
437
+ ("lukaemon/mmlu", subcategories),
438
+ metric_name=("sustech/tlem", "mmlu"),
439
+ input_column=cls.input_column,
440
+ label_column=cls.label_column,
441
+ prompt=partial(cls.prompt_mmlu, chat=chat),
442
+ few_shot=0 if chat else 5,
443
+ few_shot_from="validation",
444
+ )
445
+ for subcategories in finer_categories[subject]
446
+ ]
447
+ )
448
  return suite