from typing import Literal from huggingface_hub import HfFileSystem, hf_hub_download KEY_TO_CATEGORY_NAME = { "full": "Overall", "coding": "Coding", "long_user": "Longer Query", "english": "English", "chinese": "Chinese", "french": "French", "no_tie": "Exclude Ties", "no_short": "Exclude Short Query (< 5 tokens)", "no_refusal": "Exclude Refusal", } CAT_NAME_TO_EXPLANATION = { "Overall": "Overall Questions", "Coding": "Coding: whether conversation contains code snippets", "Longer Query": "Longer Query (>= 500 tokens)", "English": "English Prompts", "Chinese": "Chinese Prompts", "French": "French Prompts", "Exclude Ties": "Exclude Ties and Bothbad", "Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)", "Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")', } PROPRIETARY_LICENSES = [ "Proprietary", ] def download_latest_data_from_space( repo_id: str, file_type: Literal["pkl", "csv"] ) -> str: """ Downloads the latest data file of the specified file type from the given repository space. Args: repo_id (str): The ID of the repository space. file_type (Literal["pkl", "csv"]): The type of the data file to download. Must be either "pkl" or "csv". Returns: str: The local file path of the downloaded data file. """ def extract_date(filename): return filename.split("/")[-1].split(".")[0].split("_")[-1] fs = HfFileSystem() data_file_path = f"spaces/{repo_id}/*.{file_type}" files = fs.glob(data_file_path) latest_file = sorted(files, key=extract_date, reverse=True)[0] latest_filepath_local = hf_hub_download( repo_id=repo_id, filename=latest_file.split("/")[-1], repo_type="space", ) return latest_filepath_local