data_only_hallucination_leaderboard

Runtime error

pminervini commited on Dec 7, 2023

Commit

a88d51c

1 Parent(s): 9bfc5f2

update

Files changed (3) hide show

backend-cli.py CHANGED Viewed

@@ -3,6 +3,7 @@
 import os
 import json
 from datetime import datetime
 from huggingface_hub import snapshot_download
@@ -99,7 +100,6 @@ def process_finished_requests() -> bool:
     # Sort the evals by priority (first submitted first run)
     eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
-    import random
     random.shuffle(eval_requests)
     from src.leaderboard.read_evals import get_raw_eval_results
@@ -115,8 +115,11 @@ def process_finished_requests() -> bool:
         from typing import Optional
         eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
         # Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
-        for task in TASKS_HARNESS:
             task_name = task.benchmark
             if eval_result is None or task_name not in eval_result.results:
@@ -145,7 +148,6 @@ def process_pending_requests() -> bool:
     # Sort the evals by priority (first submitted first run)
     eval_requests = sort_models_by_priority(api=API, models=eval_requests)
-    import random
     random.shuffle(eval_requests)
     print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")

 import os
 import json
+import random
 from datetime import datetime
 from huggingface_hub import snapshot_download
     # Sort the evals by priority (first submitted first run)
     eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
     random.shuffle(eval_requests)
     from src.leaderboard.read_evals import get_raw_eval_results
         from typing import Optional
         eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
+        task_lst = TASKS_HARNESS.copy()
+        random.shuffle(task_lst)
         # Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
+        for task in task_lst:
             task_name = task.benchmark
             if eval_result is None or task_name not in eval_result.results:
     # Sort the evals by priority (first submitted first run)
     eval_requests = sort_models_by_priority(api=API, models=eval_requests)
     random.shuffle(eval_requests)
     print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -133,7 +133,7 @@ class EvalResult:
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
         except Exception:
-            print(f"Could not find request file for {self.org}/{self.model}")
     def is_complete(self) -> bool:
         for task in Tasks:
@@ -169,7 +169,7 @@ class EvalResult:
 def get_request_file_for_model(requests_path, model_name, precision):
-    """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
     request_files = os.path.join(
         requests_path,
         f"{model_name}_eval_request_*.json",
@@ -179,11 +179,12 @@ def get_request_file_for_model(requests_path, model_name, precision):
     # Select correct request file (precision)
     request_file = ""
     request_files = sorted(request_files, reverse=True)
     for tmp_request_file in request_files:
         with open(tmp_request_file, "r") as f:
             req_content = json.load(f)
             if (
-                req_content["status"] in ["FINISHED"]
                 and req_content["precision"] == precision.split(".")[-1]
             ):
                 request_file = tmp_request_file

             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
         except Exception:
+            print(f"Could not find request file for {self.org}/{self.model} -- path: {requests_path}")
     def is_complete(self) -> bool:
         for task in Tasks:
 def get_request_file_for_model(requests_path, model_name, precision):
+    """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED and RUNNING"""
     request_files = os.path.join(
         requests_path,
         f"{model_name}_eval_request_*.json",
     # Select correct request file (precision)
     request_file = ""
     request_files = sorted(request_files, reverse=True)
+    # print('XXX', request_files)
     for tmp_request_file in request_files:
         with open(tmp_request_file, "r") as f:
             req_content = json.load(f)
             if (
+                req_content["status"] in ["FINISHED", "RUNNING"]
                 and req_content["precision"] == precision.split(".")[-1]
             ):
                 request_file = tmp_request_file

submit-cli.py CHANGED Viewed

@@ -118,7 +118,7 @@ def main():
     filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
-    for i in range(min(50, len(filtered_model_lst))):
         model = filtered_model_lst[i]
         print(f'Considering {model.id} ..')

     filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
+    for i in range(min(200, len(filtered_model_lst))):
         model = filtered_model_lst[i]
         print(f'Considering {model.id} ..')