pminervini commited on
Commit
e5e2b84
1 Parent(s): d374577
src/display/utils.py CHANGED
@@ -44,7 +44,9 @@ class Tasks(Enum):
44
  halueval_summ = Task("halueval_summarization", "acc", "HaluSumm/Acc")
45
  halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
46
 
47
- selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
 
 
48
 
49
  # These classes are for user facing column names,
50
  # to avoid having to change them all around the code
 
44
  halueval_summ = Task("halueval_summarization", "acc", "HaluSumm/Acc")
45
  halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
46
 
47
+ # XXX include me back at some point
48
+ # selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
49
+
50
 
51
  # These classes are for user facing column names,
52
  # to avoid having to change them all around the code
src/leaderboard/filter_models.py CHANGED
@@ -20,6 +20,7 @@ DO_NOT_SUBMIT_MODELS = [
20
  "Voicelab/trurl-2-13b", # trained on MMLU
21
  ]
22
 
 
23
  def flag_models(leaderboard_data: list[dict]):
24
  for model_data in leaderboard_data:
25
  if model_data["model_name_for_query"] in FLAGGED_MODELS:
 
20
  "Voicelab/trurl-2-13b", # trained on MMLU
21
  ]
22
 
23
+
24
  def flag_models(leaderboard_data: list[dict]):
25
  for model_data in leaderboard_data:
26
  if model_data["model_name_for_query"] in FLAGGED_MODELS:
src/utils.py CHANGED
@@ -19,6 +19,7 @@ def get_dataset_url(row):
19
  benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>'
20
  return benchmark
21
 
 
22
  def get_dataset_summary_table(file_path):
23
  df = pd.read_csv(file_path)
24
 
 
19
  benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>'
20
  return benchmark
21
 
22
+
23
  def get_dataset_summary_table(file_path):
24
  df = pd.read_csv(file_path)
25