open_llm_leaderboard

Runtime error

Nathan Habib commited on Apr 5

Commit

df0b79f

•

1 Parent(s): d1e81be

commit

Files changed (4) hide show

app.py CHANGED Viewed

@@ -50,21 +50,21 @@ def init_space(full_init: bool = True):
         try:
             print(EVAL_REQUESTS_PATH)
             snapshot_download(
-                repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
             )
         except Exception:
             restart_space()
         try:
             print(DYNAMIC_INFO_PATH)
             snapshot_download(
-                repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
             )
         except Exception:
             restart_space()
         try:
             print(EVAL_RESULTS_PATH)
             snapshot_download(
-                repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
             )
         except Exception:
             restart_space()

         try:
             print(EVAL_REQUESTS_PATH)
             snapshot_download(
+                repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, max_workers=8
             )
         except Exception:
             restart_space()
         try:
             print(DYNAMIC_INFO_PATH)
             snapshot_download(
+                repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, max_workers=8
             )
         except Exception:
             restart_space()
         try:
             print(EVAL_RESULTS_PATH)
             snapshot_download(
+                repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,   max_workers=8
             )
         except Exception:
             restart_space()

src/leaderboard/read_evals.py CHANGED Viewed

@@ -202,6 +202,8 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
         if eval_result.full_model in dynamic_data:
             eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
             # Hardcoding because of gating problem

         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
         eval_result.update_with_request_file(requests_path)
+        if eval_result.full_model == "databricks/dbrx-base":
+            print("WE HERE")
         if eval_result.full_model in dynamic_data:
             eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
             # Hardcoding because of gating problem

src/populate.py CHANGED Viewed

@@ -13,9 +13,12 @@ def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,
     raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     all_data_json.append(baseline_row)
     filter_models_flags(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
@@ -44,7 +47,11 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
             for sub_entry in sub_entries:
                 file_path = os.path.join(save_path, entry, sub_entry)
                 with open(file_path) as fp:
-                    data = json.load(fp)
                 data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
                 data[EvalQueueColumn.revision.name] = data.get("revision", "main")

     raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path)
     all_data_json = [v.to_dict() for v in raw_data]
     all_data_json.append(baseline_row)
+    print([data for data in all_data_json if data["model_name_for_query"] == "databricks/dbrx-base"])
     filter_models_flags(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
+    print(df.columns)
+    print(df[df["model_name_for_query"] == "databricks/dbrx-base"])
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
             for sub_entry in sub_entries:
                 file_path = os.path.join(save_path, entry, sub_entry)
                 with open(file_path) as fp:
+                    try:
+                        data = json.load(fp)
+                    except json.JSONDecodeError:
+                        print(f"Error reading {file_path}")
+                        continue
                 data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
                 data[EvalQueueColumn.revision.name] = data.get("revision", "main")

src/submission/check_validity.py CHANGED Viewed

@@ -150,6 +150,9 @@ def get_model_tags(model_card, model: str):
     if is_merge_from_model_card or is_merge_from_metadata:
         tags.append("merge")
     is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
     is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
     if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
         tags.append("moe")

     if is_merge_from_model_card or is_merge_from_metadata:
         tags.append("merge")
     is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
+    # Hardcoding because of gating problem
+    if model == "Qwen/Qwen1.5-32B":
+        is_moe_from_model_card = False
     is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
     if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
         tags.append("moe")