Spaces:

andrewrreed
/

closed-vs-open-arena-elo

Running on CPU Upgrade

App Files Files Community

closed-vs-open-arena-elo / utils.py

andrewrreed HF staff

Add filters

167137b 5 months ago

raw

history blame

No virus

1.87 kB

	from typing import Literal

	from huggingface_hub import HfFileSystem, hf_hub_download

	KEY_TO_CATEGORY_NAME = {
	"full": "Overall",
	"coding": "Coding",
	"long_user": "Longer Query",
	"english": "English",
	"chinese": "Chinese",
	"french": "French",
	"no_tie": "Exclude Ties",
	"no_short": "Exclude Short Query (< 5 tokens)",
	"no_refusal": "Exclude Refusal",
	}
	CAT_NAME_TO_EXPLANATION = {
	"Overall": "Overall Questions",
	"Coding": "Coding: whether conversation contains code snippets",
	"Longer Query": "Longer Query (>= 500 tokens)",
	"English": "English Prompts",
	"Chinese": "Chinese Prompts",
	"French": "French Prompts",
	"Exclude Ties": "Exclude Ties and Bothbad",
	"Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
	"Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
	}

	PROPRIETARY_LICENSES = [
	"Proprietary",
	]


	def download_latest_data_from_space(
	repo_id: str, file_type: Literal["pkl", "csv"]
	) -> str:
	"""
	Downloads the latest data file of the specified file type from the given repository space.

	Args:
	repo_id (str): The ID of the repository space.
	file_type (Literal["pkl", "csv"]): The type of the data file to download. Must be either "pkl" or "csv".

	Returns:
	str: The local file path of the downloaded data file.
	"""

	def extract_date(filename):
	return filename.split("/")[-1].split(".")[0].split("_")[-1]

	fs = HfFileSystem()
	data_file_path = f"spaces/{repo_id}/*.{file_type}"
	files = fs.glob(data_file_path)
	latest_file = sorted(files, key=extract_date, reverse=True)[0]

	latest_filepath_local = hf_hub_download(
	repo_id=repo_id,
	filename=latest_file.split("/")[-1],
	repo_type="space",
	)
	return latest_filepath_local