Spaces:

andrewrreed
/

closed-vs-open-arena-elo

Running on CPU Upgrade

App Files Files Community

closed-vs-open-arena-elo / utils.py

andrewrreed HF staff

add new categories

6fbf558 6 months ago

raw

history blame

6.72 kB

	import json
	from datetime import datetime

	from typing import Literal, List

	import pandas as pd
	from huggingface_hub import HfFileSystem, hf_hub_download

	# from: https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/monitor/monitor.py#L389
	KEY_TO_CATEGORY_NAME = {
	"full": "Overall",
	"dedup": "De-duplicate Top Redundant Queries (soon to be default)",
	"coding": "Coding",
	"hard_6": "Hard Prompts (Overall)",
	"hard_english_6": "Hard Prompts (English)",
	"long_user": "Longer Query",
	"english": "English",
	"chinese": "Chinese",
	"french": "French",
	"no_tie": "Exclude Ties",
	"no_short": "Exclude Short Query (< 5 tokens)",
	"no_refusal": "Exclude Refusal",
	"overall_limit_5_user_vote": "overall_limit_5_user_vote",
	}
	CAT_NAME_TO_EXPLANATION = {
	"Overall": "Overall Questions",
	"De-duplicate Top Redundant Queries (soon to be default)": "De-duplicate top redundant queries (top 0.1%). See details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/#note-enhancing-quality-through-de-duplication).",
	"Coding": "Coding: whether conversation contains code snippets",
	"Hard Prompts (Overall)": "Hard Prompts (Overall): details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
	"Hard Prompts (English)": "Hard Prompts (English), note: the delta is to English Category. details in [blog post](https://lmsys.org/blog/2024-05-17-category-hard/)",
	"Longer Query": "Longer Query (>= 500 tokens)",
	"English": "English Prompts",
	"Chinese": "Chinese Prompts",
	"French": "French Prompts",
	"Exclude Ties": "Exclude Ties and Bothbad",
	"Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
	"Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
	"overall_limit_5_user_vote": "overall_limit_5_user_vote",
	}

	PROPRIETARY_LICENSES = ["Proprietary", "Proprietory"]


	def download_latest_data_from_space(
	repo_id: str, file_type: Literal["pkl", "csv"]
	) -> str:
	"""
	Downloads the latest data file of the specified file type from the given repository space.

	Args:
	repo_id (str): The ID of the repository space.
	file_type (Literal["pkl", "csv"]): The type of the data file to download. Must be either "pkl" or "csv".

	Returns:
	str: The local file path of the downloaded data file.
	"""

	def extract_date(filename):
	return filename.split("/")[-1].split(".")[0].split("_")[-1]

	fs = HfFileSystem()
	data_file_path = f"spaces/{repo_id}/*.{file_type}"
	files = fs.glob(data_file_path)
	latest_file = sorted(files, key=extract_date, reverse=True)[0]

	latest_filepath_local = hf_hub_download(
	repo_id=repo_id,
	filename=latest_file.split("/")[-1],
	repo_type="space",
	)
	return latest_filepath_local


	def get_constants(dfs):
	"""
	Calculate and return the minimum and maximum Elo scores, as well as the maximum number of models per month.

	Parameters:
	- dfs (dict): A dictionary containing DataFrames for different categories.

	Returns:
	- min_elo_score (float): The minimum Elo score across all DataFrames.
	- max_elo_score (float): The maximum Elo score across all DataFrames.
	- upper_models_per_month (int): The maximum number of models per month per license across all DataFrames.
	"""
	filter_ranges = {}
	for k, df in dfs.items():
	filter_ranges[k] = {
	"min_elo_score": df["rating"].min().round(),
	"max_elo_score": df["rating"].max().round(),
	"upper_models_per_month": int(
	df.groupby(["Month-Year", "License"])["rating"]
	.apply(lambda x: x.count())
	.max()
	),
	}

	min_elo_score = float("inf")
	max_elo_score = float("-inf")
	upper_models_per_month = 0

	for _, value in filter_ranges.items():
	min_elo_score = min(min_elo_score, value["min_elo_score"])
	max_elo_score = max(max_elo_score, value["max_elo_score"])
	upper_models_per_month = max(
	upper_models_per_month, value["upper_models_per_month"]
	)
	return min_elo_score, max_elo_score, upper_models_per_month


	def update_release_date_mapping(
	new_model_keys_to_add: List[str],
	leaderboard_df: pd.DataFrame,
	release_date_mapping: pd.DataFrame,
	) -> pd.DataFrame:
	"""
	Update the release date mapping with new model keys.

	Args:
	new_model_keys_to_add (List[str]): A list of new model keys to add to the release date mapping.
	leaderboard_df (pd.DataFrame): The leaderboard DataFrame containing the model information.
	release_date_mapping (pd.DataFrame): The current release date mapping DataFrame.

	Returns:
	pd.DataFrame: The updated release date mapping DataFrame.
	"""
	# if any, add those to the release date mapping
	if new_model_keys_to_add:
	for key in new_model_keys_to_add:
	new_entry = {
	"key": key,
	"Model": leaderboard_df[leaderboard_df["key"] == key]["Model"].values[
	0
	],
	"Release Date": datetime.today().strftime("%Y-%m-%d"),
	}

	with open("release_date_mapping.json", "r") as file:
	data = json.load(file)

	data.append(new_entry)

	with open("release_date_mapping.json", "w") as file:
	json.dump(data, file, indent=4)

	print(f"Added {key} to release_date_mapping.json")

	# reload the release date mapping
	release_date_mapping = pd.read_json(
	"release_date_mapping.json", orient="records"
	)
	return release_date_mapping


	def format_data(df):
	"""
	Formats the given DataFrame by performing the following operations:
	- Converts the 'License' column values to 'Proprietary LLM' if they are in PROPRIETARY_LICENSES, otherwise 'Open LLM'.
	- Converts the 'Release Date' column to datetime format.
	- Adds a new 'Month-Year' column by extracting the month and year from the 'Release Date' column.
	- Rounds the 'rating' column to the nearest integer.
	- Resets the index of the DataFrame.

	Args:
	df (pandas.DataFrame): The DataFrame to be formatted.

	Returns:
	pandas.DataFrame: The formatted DataFrame.
	"""
	df["License"] = df["License"].apply(
	lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
	)
	df["Release Date"] = pd.to_datetime(df["Release Date"])
	df["Month-Year"] = df["Release Date"].dt.to_period("M")
	df["rating"] = df["rating"].round()
	return df.reset_index(drop=True)