open_pl_llm_leaderboard

Runtime error

App Files Files Community

djstrong commited on Jul 3, 2024

Commit

ce8c09b

1 Parent(s): 1535574

output to json

Browse files

Files changed (2) hide show

app.py +9 -4
src/populate.py +3 -2

app.py CHANGED Viewed

@@ -43,14 +43,14 @@ def launch_backend():
     _ = subprocess.run(["python", "main_backend.py"])
 try:
-    print(EVAL_REQUESTS_PATH)
     snapshot_download(
         repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
 except Exception:
     restart_space()
 try:
-    print(EVAL_RESULTS_PATH)
     snapshot_download(
         repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
@@ -162,8 +162,8 @@ def filter_models(
     type_emoji = [t[0] for t in type_query]
     filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
     filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
-    print(df[AutoEvalColumn.n_shot.name])
-    print(nshot_query)
     filtered_df = filtered_df.loc[df[AutoEvalColumn.n_shot.name].isin(nshot_query + ["None"])]
     numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
@@ -403,12 +403,17 @@ with demo:
                 show_copy_button=True,
             )
         csv = gr.File(interactive=False, value="output.csv", visible=False)
         def update_visibility(radio):
             return gr.File(interactive=False, value="output.csv", visible=True)
         deleted_models_visibility.change(update_visibility, deleted_models_visibility, csv)
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)

     _ = subprocess.run(["python", "main_backend.py"])
 try:
+    # print(EVAL_REQUESTS_PATH)
     snapshot_download(
         repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
 except Exception:
     restart_space()
 try:
+    # print(EVAL_RESULTS_PATH)
     snapshot_download(
         repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
     type_emoji = [t[0] for t in type_query]
     filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
     filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
+    # print(df[AutoEvalColumn.n_shot.name])
+    # print(nshot_query)
     filtered_df = filtered_df.loc[df[AutoEvalColumn.n_shot.name].isin(nshot_query + ["None"])]
     numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
                 show_copy_button=True,
             )
         csv = gr.File(interactive=False, value="output.csv", visible=False)
+        json = gr.File(interactive=False, value="all_data.json", visible=False)
         def update_visibility(radio):
             return gr.File(interactive=False, value="output.csv", visible=True)
+        def update_visibility_json(radio):
+            return gr.File(interactive=False, value="all_data.json", visible=True)
         deleted_models_visibility.change(update_visibility, deleted_models_visibility, csv)
+        deleted_models_visibility.change(update_visibility_json, deleted_models_visibility, json)
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)

src/populate.py CHANGED Viewed

@@ -12,13 +12,14 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     metadata=json.load(open(f"{requests_path}/metadata.json"))
     raw_data = get_raw_eval_results(results_path, requests_path, metadata)
     all_data_json = [v.to_dict() for v in raw_data]
-    print(all_data_json)
     df = pd.DataFrame.from_records(all_data_json)
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced
-    df2 = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df

     metadata=json.load(open(f"{requests_path}/metadata.json"))
     raw_data = get_raw_eval_results(results_path, requests_path, metadata)
     all_data_json = [v.to_dict() for v in raw_data]
+    # print(all_data_json)
+    json.dump(all_data_json, open("all_data.json", "w"), indent=2, ensure_ascii=False)
     df = pd.DataFrame.from_records(all_data_json)
     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced
+    #df2 = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df