alozowski
Improved leaderboard update [wip]
34f418b
raw
history blame
2.97 kB
import logging
import datasets
from src.populate import get_leaderboard_df
from src.envs import AGGREGATED_REPO, HF_HOME
from src.display.utils import COLS, BENCHMARK_COLS
class LeaderboardData:
def __init__(self):
self.__data = None
self.aggregated_repo = AGGREGATED_REPO # Replace with your actual repo
self.hf_home = HF_HOME # Replace with your actual HF_HOME
self.cols = COLS # Replace with your actual COLS
self.benchmark_cols = BENCHMARK_COLS # Replace with your actual BENCHMARK_COLS
def __update(self):
"""Internal method to download and process leaderboard data."""
try:
leaderboard_dataset = datasets.load_dataset(
self.aggregated_repo,
"default",
split="train",
cache_dir=self.hf_home,
download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
verification_mode="no_checks"
)
self.__data = get_leaderboard_df(
leaderboard_dataset=leaderboard_dataset,
cols=self.cols,
benchmark_cols=self.benchmark_cols,
)
logging.info("Leaderboard dataset successfully downloaded.")
return self.__data
except Exception as e:
logging.error(f"Failed to download leaderboard dataset: {e}")
return None
def update(self):
"""Public method to trigger leaderboard data update."""
logging.info("Leaderboard updated at reload!")
return self.__update()
def get_data(self):
"""Returns the current leaderboard data."""
return self.__data
# def get_latest_data_leaderboard(leaderboard_initial_df=None):
# global NEW_DATA_ON_LEADERBOARD
# global LEADERBOARD_DF
# if NEW_DATA_ON_LEADERBOARD:
# logging.info("Leaderboard updated at reload!")
# try:
# leaderboard_dataset = datasets.load_dataset(
# AGGREGATED_REPO,
# "default",
# split="train",
# cache_dir=HF_HOME,
# download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
# verification_mode="no_checks"
# )
# LEADERBOARD_DF = get_leaderboard_df(
# leaderboard_dataset=leaderboard_dataset,
# cols=COLS,
# benchmark_cols=BENCHMARK_COLS,
# )
# logging.info("Leaderboard dataset successfully downloaded.")
# except Exception as e:
# logging.error(f"Failed to download leaderboard dataset: {e}")
# return
# # Reset the flag after successful download
# NEW_DATA_ON_LEADERBOARD = False
# else:
# LEADERBOARD_DF = leaderboard_initial_df
# logging.info("Using cached leaderboard dataset.")
# return LEADERBOARD_DF