open_pl_llm_leaderboard

Running on CPU Upgrade

djstrong commited on Mar 26

Commit

91765f5

•

1 Parent(s): 39d6a74

show perplexity for 5-shot too

Files changed (1) hide show

src/leaderboard/read_evals.py CHANGED Viewed

@@ -95,9 +95,13 @@ class EvalResult:
         for task in Tasks:
             task = task.value
             # We average all scores of a given metric (not all metrics are present in all files)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
-                             task.benchmark == k and n_shot.get(k, -1) == n_shot_num])
             if accs.size == 0 or any([acc is None for acc in accs]):
                 continue

         for task in Tasks:
             task = task.value
+            task_n_shot_num = n_shot_num
+            if 'perplexity' in task.metric: # perplexity is the same for 0-shot and 5-shot and is calculated only with 0-shot
+                task_n_shot_num = 0
             # We average all scores of a given metric (not all metrics are present in all files)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
+                             task.benchmark == k and n_shot.get(k, -1) == task_n_shot_num])
             if accs.size == 0 or any([acc is None for acc in accs]):
                 continue