Sean Cho commited on
Commit
150c99b
1 Parent(s): f1b022c

update evaluation fields

Browse files
Files changed (2) hide show
  1. app.py +2 -0
  2. src/display_models/read_results.py +6 -10
app.py CHANGED
@@ -77,6 +77,8 @@ BENCHMARK_COLS = [
77
  AutoEvalColumn.hellaswag,
78
  AutoEvalColumn.mmlu,
79
  AutoEvalColumn.truthfulqa,
 
 
80
  ]
81
  ]
82
 
 
77
  AutoEvalColumn.hellaswag,
78
  AutoEvalColumn.mmlu,
79
  AutoEvalColumn.truthfulqa,
80
+ AutoEvalColumn.commongen,
81
+ AutoEvalColumn.ethicalverification,
82
  ]
83
  ]
84
 
src/display_models/read_results.py CHANGED
@@ -9,13 +9,13 @@ import numpy as np
9
  from src.display_models.utils import AutoEvalColumn, make_clickable_model
10
 
11
  METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
12
- BENCHMARKS = ["arc:challenge", "hellaswag", "hendrycksTest", "truthfulqa:mc", "commongen", "ethicalverification"]
13
  BENCH_TO_NAME = {
14
- "arc:challenge": AutoEvalColumn.arc.name,
15
- "hellaswag": AutoEvalColumn.hellaswag.name,
16
- "hendrycksTest": AutoEvalColumn.mmlu.name,
17
- "truthfulqa:mc": AutoEvalColumn.truthfulqa.name,
18
- "commongen": AutoEvalColumn.commongen.name,
19
  "ethicalverification": AutoEvalColumn.ethicalverification.name,
20
  }
21
 
@@ -66,10 +66,6 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
66
  with open(json_filepath) as fp:
67
  data = json.load(fp)
68
 
69
- for mmlu_k in ["harness|hendrycksTest-abstract_algebra|5", "hendrycksTest-abstract_algebra"]:
70
- if mmlu_k in data["versions"] and data["versions"][mmlu_k] == 0:
71
- return None, [] # we skip models with the wrong version
72
-
73
  try:
74
  config = data["config"]
75
  except KeyError:
 
9
  from src.display_models.utils import AutoEvalColumn, make_clickable_model
10
 
11
  METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
12
+ BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa:mc", "ko_commongen", "ethicalverification"]
13
  BENCH_TO_NAME = {
14
+ "ko_arc_challenge": AutoEvalColumn.arc.name,
15
+ "ko_hellaswag": AutoEvalColumn.hellaswag.name,
16
+ "ko_mmlu": AutoEvalColumn.mmlu.name,
17
+ "ko_truthfulqa:mc": AutoEvalColumn.truthfulqa.name,
18
+ "ko_commongen": AutoEvalColumn.commongen.name,
19
  "ethicalverification": AutoEvalColumn.ethicalverification.name,
20
  }
21
 
 
66
  with open(json_filepath) as fp:
67
  data = json.load(fp)
68
 
 
 
 
 
69
  try:
70
  config = data["config"]
71
  except KeyError: