Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1023

alozowski commited on Oct 24

Commit

34f418b

•

1 Parent(s): 37b898a

Improved leaderboard update [wip]

Browse files

Files changed (2) hide show

app.py +18 -50
src/leaderboard/data.py +79 -0

app.py CHANGED Viewed

@@ -44,6 +44,7 @@ from src.envs import (
     HF_HOME,
 )
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 from src.submission.submit import add_new_eval
 from src.voting.vote_system import VoteManager, run_scheduler
@@ -59,13 +60,17 @@ DO_FULL_INIT = True # os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
 NEW_DATA_ON_LEADERBOARD = True
 LEADERBOARD_DF = None
 def restart_space():
     logging.info(f"Restarting space with repo ID: {REPO_ID}")
     try:
         # Check if new data is pending and download if necessary
         if NEW_DATA_ON_LEADERBOARD:
             logging.info("Fetching latest leaderboard data before restart.")
-            get_latest_data_leaderboard()
         # Now restart the space
         API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
@@ -109,37 +114,6 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
             attempt += 1
     raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
-def get_latest_data_leaderboard(leaderboard_initial_df=None):
-    global NEW_DATA_ON_LEADERBOARD
-    global LEADERBOARD_DF
-    if NEW_DATA_ON_LEADERBOARD:
-        logging.info("Leaderboard updated at reload!")
-        try:
-            leaderboard_dataset = datasets.load_dataset(
-                AGGREGATED_REPO,
-                "default",
-                split="train",
-                cache_dir=HF_HOME,
-                download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,  # Always download fresh data
-                verification_mode="no_checks"
-            )
-            LEADERBOARD_DF = get_leaderboard_df(
-                leaderboard_dataset=leaderboard_dataset,
-                cols=COLS,
-                benchmark_cols=BENCHMARK_COLS,
-            )
-            logging.info("Leaderboard dataset successfully downloaded.")
-        except Exception as e:
-            logging.error(f"Failed to download leaderboard dataset: {e}")
-            return
-        # Reset the flag after successful download
-        NEW_DATA_ON_LEADERBOARD = False
-    else:
-        LEADERBOARD_DF = leaderboard_initial_df
-        logging.info("Using cached leaderboard dataset.")
-    return LEADERBOARD_DF
 def get_latest_data_queue():
     eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
@@ -147,8 +121,7 @@ def get_latest_data_queue():
 def init_space():
     """Initializes the application space, loading only necessary data."""
-    global NEW_DATA_ON_LEADERBOARD
-    NEW_DATA_ON_LEADERBOARD = True  # Ensure new data is always pulled on restart
     if DO_FULL_INIT:
         # These downloads only occur on full initialization
@@ -158,18 +131,14 @@ def init_space():
         except Exception:
             restart_space()
-    # Always redownload the leaderboard DataFrame
-    global LEADERBOARD_DF
-    LEADERBOARD_DF = get_latest_data_leaderboard()
     # Evaluation queue DataFrame retrieval is independent of initialization detail level
     eval_queue_dfs = get_latest_data_queue()
-    return LEADERBOARD_DF, eval_queue_dfs
 # Initialize VoteManager
 vote_manager = VoteManager(VOTES_PATH, EVAL_REQUESTS_PATH, VOTES_REPO)
 # Schedule the upload_votes method to run every 15 minutes
 schedule.every(15).minutes.do(vote_manager.upload_votes)
@@ -180,10 +149,11 @@ scheduler_thread.start()
 # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
 # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
-LEADERBOARD_DF, eval_queue_dfs = init_space()
 finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
 # Function to check if a user is logged in
 def check_login(profile: gr.OAuthProfile | None) -> bool:
     if profile is None:
@@ -193,8 +163,11 @@ def check_login(profile: gr.OAuthProfile | None) -> bool:
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
     return Leaderboard(
-        value=dataframe,
         datatype=[c.type for c in fields(AutoEvalColumn)],
         select_columns=SelectColumns(
             default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
@@ -236,7 +209,7 @@ with main_block:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
         with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
             with gr.Column():
@@ -425,7 +398,7 @@ with main_block:
                 show_copy_button=True,
             )
-    main_block.load(fn=get_latest_data_leaderboard, inputs=[leaderboard], outputs=[leaderboard])
     leaderboard.change(fn=get_latest_data_queue, inputs=None, outputs=[finished_eval_table, running_eval_table, pending_eval_table])
     pending_eval_table.change(fn=vote_manager.create_request_vote_df, inputs=[pending_eval_table], outputs=[pending_eval_table_votes])
@@ -466,14 +439,9 @@ webhooks_server = enable_space_ci_and_return_server(ui=main_block)
 def update_leaderboard(payload: WebhookPayload) -> None:
     """Redownloads the leaderboard dataset each time it updates"""
     if payload.repo.type == "dataset" and payload.event.action == "update":
-        global NEW_DATA_ON_LEADERBOARD
         logging.info("New data detected, downloading updated leaderboard dataset.")
-        # Mark the flag for new data
-        NEW_DATA_ON_LEADERBOARD = True
         # Now actually download the latest data immediately
-        get_latest_data_leaderboard()
 # The below code is not used at the moment, as we can manage the queue file locally
 LAST_UPDATE_QUEUE = datetime.datetime.now()

     HF_HOME,
 )
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.leaderboard.data import LeaderboardData
 from src.submission.submit import add_new_eval
 from src.voting.vote_system import VoteManager, run_scheduler
 NEW_DATA_ON_LEADERBOARD = True
 LEADERBOARD_DF = None
+leaderboard_data = LeaderboardData()
 def restart_space():
     logging.info(f"Restarting space with repo ID: {REPO_ID}")
     try:
         # Check if new data is pending and download if necessary
         if NEW_DATA_ON_LEADERBOARD:
             logging.info("Fetching latest leaderboard data before restart.")
+            leaderboard_data.update()
         # Now restart the space
         API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
             attempt += 1
     raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
 def get_latest_data_queue():
     eval_queue_dfs = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
 def init_space():
     """Initializes the application space, loading only necessary data."""
+    leaderboard_data.update()
     if DO_FULL_INIT:
         # These downloads only occur on full initialization
         except Exception:
             restart_space()
     # Evaluation queue DataFrame retrieval is independent of initialization detail level
     eval_queue_dfs = get_latest_data_queue()
+    return eval_queue_dfs
 # Initialize VoteManager
 vote_manager = VoteManager(VOTES_PATH, EVAL_REQUESTS_PATH, VOTES_REPO)
+schedule.every(15).seconds.do(leaderboard_data.update)
 # Schedule the upload_votes method to run every 15 minutes
 schedule.every(15).minutes.do(vote_manager.upload_votes)
 # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
 # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
+eval_queue_dfs = init_space()
 finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = eval_queue_dfs
+<<<<<<< Updated upstream
 # Function to check if a user is logged in
 def check_login(profile: gr.OAuthProfile | None) -> bool:
     if profile is None:
 def init_leaderboard(dataframe):
     if dataframe is None or dataframe.empty:
         raise ValueError("Leaderboard DataFrame is empty or None.")
+=======
+def make_leaderboard(leaderboard_data: LeaderboardData):
+>>>>>>> Stashed changes
     return Leaderboard(
+        value=leaderboard_data.get_data(),
         datatype=[c.type for c in fields(AutoEvalColumn)],
         select_columns=SelectColumns(
             default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
+            leaderboard = make_leaderboard(leaderboard_data)
         with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=5):
             with gr.Column():
                 show_copy_button=True,
             )
+    # main_block.load(fn=leaderboard_data.get_data, inputs=[leaderboard], outputs=[leaderboard])
     leaderboard.change(fn=get_latest_data_queue, inputs=None, outputs=[finished_eval_table, running_eval_table, pending_eval_table])
     pending_eval_table.change(fn=vote_manager.create_request_vote_df, inputs=[pending_eval_table], outputs=[pending_eval_table_votes])
 def update_leaderboard(payload: WebhookPayload) -> None:
     """Redownloads the leaderboard dataset each time it updates"""
     if payload.repo.type == "dataset" and payload.event.action == "update":
         logging.info("New data detected, downloading updated leaderboard dataset.")
         # Now actually download the latest data immediately
+        leaderboard_data.update()
 # The below code is not used at the moment, as we can manage the queue file locally
 LAST_UPDATE_QUEUE = datetime.datetime.now()

src/leaderboard/data.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import logging
+import datasets
+from src.populate import get_leaderboard_df
+from src.envs import AGGREGATED_REPO, HF_HOME
+from src.display.utils import COLS, BENCHMARK_COLS
+class LeaderboardData:
+    def __init__(self):
+        self.__data = None
+        self.aggregated_repo = AGGREGATED_REPO  # Replace with your actual repo
+        self.hf_home = HF_HOME  # Replace with your actual HF_HOME
+        self.cols = COLS  # Replace with your actual COLS
+        self.benchmark_cols = BENCHMARK_COLS  # Replace with your actual BENCHMARK_COLS
+    def __update(self):
+        """Internal method to download and process leaderboard data."""
+        try:
+            leaderboard_dataset = datasets.load_dataset(
+                self.aggregated_repo,
+                "default",
+                split="train",
+                cache_dir=self.hf_home,
+                download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
+                verification_mode="no_checks"
+            )
+            self.__data = get_leaderboard_df(
+                leaderboard_dataset=leaderboard_dataset,
+                cols=self.cols,
+                benchmark_cols=self.benchmark_cols,
+            )
+            logging.info("Leaderboard dataset successfully downloaded.")
+            return self.__data
+        except Exception as e:
+            logging.error(f"Failed to download leaderboard dataset: {e}")
+            return None
+    def update(self):
+        """Public method to trigger leaderboard data update."""
+        logging.info("Leaderboard updated at reload!")
+        return self.__update()
+    def get_data(self):
+        """Returns the current leaderboard data."""
+        return self.__data
+# def get_latest_data_leaderboard(leaderboard_initial_df=None):
+#     global NEW_DATA_ON_LEADERBOARD
+#     global LEADERBOARD_DF
+#     if NEW_DATA_ON_LEADERBOARD:
+#         logging.info("Leaderboard updated at reload!")
+#         try:
+#             leaderboard_dataset = datasets.load_dataset(
+#                 AGGREGATED_REPO,
+#                 "default",
+#                 split="train",
+#                 cache_dir=HF_HOME,
+#                 download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,  # Always download fresh data
+#                 verification_mode="no_checks"
+#             )
+#             LEADERBOARD_DF = get_leaderboard_df(
+#                 leaderboard_dataset=leaderboard_dataset,
+#                 cols=COLS,
+#                 benchmark_cols=BENCHMARK_COLS,
+#             )
+#             logging.info("Leaderboard dataset successfully downloaded.")
+#         except Exception as e:
+#             logging.error(f"Failed to download leaderboard dataset: {e}")
+#             return
+#         # Reset the flag after successful download
+#         NEW_DATA_ON_LEADERBOARD = False
+#     else:
+#         LEADERBOARD_DF = leaderboard_initial_df
+#         logging.info("Using cached leaderboard dataset.")
+#     return LEADERBOARD_DF