Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1024

open_llm_leaderboard / src /leaderboard /data.py

alozowski

Improved leaderboard update [wip]

34f418b about 1 month ago

raw

history blame

2.97 kB

	import logging
	import datasets
	from src.populate import get_leaderboard_df
	from src.envs import AGGREGATED_REPO, HF_HOME
	from src.display.utils import COLS, BENCHMARK_COLS

	class LeaderboardData:
	def __init__(self):
	self.__data = None
	self.aggregated_repo = AGGREGATED_REPO # Replace with your actual repo
	self.hf_home = HF_HOME # Replace with your actual HF_HOME
	self.cols = COLS # Replace with your actual COLS
	self.benchmark_cols = BENCHMARK_COLS # Replace with your actual BENCHMARK_COLS

	def __update(self):
	"""Internal method to download and process leaderboard data."""
	try:
	leaderboard_dataset = datasets.load_dataset(
	self.aggregated_repo,
	"default",
	split="train",
	cache_dir=self.hf_home,
	download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD,
	verification_mode="no_checks"
	)

	self.__data = get_leaderboard_df(
	leaderboard_dataset=leaderboard_dataset,
	cols=self.cols,
	benchmark_cols=self.benchmark_cols,
	)

	logging.info("Leaderboard dataset successfully downloaded.")
	return self.__data

	except Exception as e:
	logging.error(f"Failed to download leaderboard dataset: {e}")
	return None

	def update(self):
	"""Public method to trigger leaderboard data update."""
	logging.info("Leaderboard updated at reload!")
	return self.__update()

	def get_data(self):
	"""Returns the current leaderboard data."""
	return self.__data


	# def get_latest_data_leaderboard(leaderboard_initial_df=None):
	# global NEW_DATA_ON_LEADERBOARD
	# global LEADERBOARD_DF
	# if NEW_DATA_ON_LEADERBOARD:
	# logging.info("Leaderboard updated at reload!")
	# try:
	# leaderboard_dataset = datasets.load_dataset(
	# AGGREGATED_REPO,
	# "default",
	# split="train",
	# cache_dir=HF_HOME,
	# download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD, # Always download fresh data
	# verification_mode="no_checks"
	# )
	# LEADERBOARD_DF = get_leaderboard_df(
	# leaderboard_dataset=leaderboard_dataset,
	# cols=COLS,
	# benchmark_cols=BENCHMARK_COLS,
	# )
	# logging.info("Leaderboard dataset successfully downloaded.")
	# except Exception as e:
	# logging.error(f"Failed to download leaderboard dataset: {e}")
	# return

	# # Reset the flag after successful download
	# NEW_DATA_ON_LEADERBOARD = False
	# else:
	# LEADERBOARD_DF = leaderboard_initial_df
	# logging.info("Using cached leaderboard dataset.")
	# return LEADERBOARD_DF