Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from enum import Enum, auto | |
#from dataclasses import dataclass | |
SubmissionType = Enum( | |
"SubmissionType", | |
[ | |
"Automatic", | |
"SemiAutomatic", | |
"Manual", | |
"Closed", | |
"Arena" | |
] | |
) | |
Evaluators = Enum( | |
"Evaluators", | |
[ | |
"Humans", # Arena | |
"Automatic", | |
"Model" | |
] | |
) | |
TestSet = Enum( | |
"TestSet", | |
[ | |
"Private", | |
"Public", | |
"Mix", | |
"Rolling", | |
"N/A" | |
] | |
) | |
Categories = Enum( | |
"Categories", | |
[ | |
"Text", | |
"Image", | |
"Audio", | |
"Video", | |
"Multimodal", | |
"Generation", | |
"Math", | |
"Code", | |
"LanguageSpecific", | |
"Performance", | |
"Safety", | |
"VibeCheck", | |
"Tools", | |
"Artefacts" | |
] | |
) | |
Languages = Enum( | |
"Languages", | |
[ | |
"Chinese", | |
"Korean", | |
"Dutch", | |
"Portuguese", | |
"Italian", | |
"Malay", | |
"Polish", | |
"Turkish" | |
] | |
) | |
leaderboard_to_tags = { | |
"HuggingFaceH4/open_llm_leaderboard": [SubmissionType.Automatic, Evaluators.Automatic, TestSet.Public, Categories.Text, Categories.Math], | |
"bigcode/bigcode-models-leaderboard": [SubmissionType.SemiAutomatic, Evaluators.Automatic, TestSet.Public, Categories.Code], | |
"optimum/llm-perf-leaderboard": [SubmissionType.Manual, Evaluators.Automatic, Categories.Performance], | |
"lmsys/chatbot-arena-leaderboard": [SubmissionType.Arena, Evaluators.Humans, Categories.Text, Categories.Generation], | |
"llmonitor/benchmarks": [SubmissionType.Manual, Evaluators.Humans, Categories.Text, Categories.VibeCheck], | |
"mteb/leaderboard": [SubmissionType.SemiAutomatic, Categories.Text, "Embeddings", Categories.Artefacts], | |
"gaia-benchmark/leaderboard": [SubmissionType.Automatic, TestSet.Private, Evaluators.Automatic, Categories.Text, Categories.Tools, Categories.Multimodal], | |
"opencompass/opencompass-llm-leaderboard": [SubmissionType.Manual, Categories.Text, Categories.LanguageSpecific, Languages.Chinese], | |
"upstage/open-ko-llm-leaderboard": [SubmissionType.Automatic, Evaluators.Automatic, TestSet.Mix, Categories.Text, Languages.Korean], | |
"BramVanroy/open_dutch_llm_leaderboard": [SubmissionType.Manual, Evaluators.Automatic, Categories.Text, Languages.Dutch], | |
"vectara/leaderboard": [SubmissionType.SemiAutomatic, Evaluators.Model, Categories.Text, "Hallucinations"], | |
"facebook/CyberSecEval": [SubmissionType.Closed, Categories.Code, Categories.Safety], | |
"mlabonne/Yet_Another_LLM_Leaderboard": [SubmissionType.Manual, Categories.Text, Evaluators.Automatic], | |
"AI-Secure/llm-trustworthy-leaderboard": [SubmissionType.Automatic, Categories.Safety, Categories.Text], | |
"AILab-CVC/EvalCrafter": [SubmissionType.Closed, Categories.Video, Categories.Generation], | |
"mike-ravkine/can-ai-code-results": [SubmissionType.Closed, Categories.Code], | |
"echo840/ocrbench-leaderboard": [SubmissionType.Closed, Categories.Image, "OCR"], | |
"NPHardEval/NPHardEval-leaderboard": [SubmissionType.Closed, Categories.Text, Categories.Math, TestSet.Rolling], | |
"HaizeLabs/red-teaming-resistance-benchmark": [SubmissionType.Manual, Categories.Safety, Categories.Text], | |
"devingulliver/subquadratic-llm-leaderboard": [SubmissionType.SemiAutomatic, Categories.Text, Categories.Math], | |
"WildVision/vision-arena": [SubmissionType.Arena, Categories.Image, Categories.Multimodal], | |
"Vchitect/VBench_Leaderboard": [SubmissionType.SemiAutomatic, Categories.Video, Categories.Generation], | |
"eduagarcia/open_pt_llm_leaderboard": [Categories.Text, Categories.LanguageSpecific, Languages.Portuguese], | |
"FinancialSupport/open_ita_llm_leaderboard": [Categories.Text, Categories.LanguageSpecific, Languages.Italian], | |
"mesolitica/malay-llm-leaderboard": [Categories.Text, Categories.LanguageSpecific, Languages.Malay], | |
"TIGER-Lab/GenAI-Arena": [Categories.Image, Categories.Generation, Evaluators.Humans, SubmissionType.Arena], | |
"q-future/Q-Bench-Leaderboard": [Categories.Image, Evaluators.Automatic, SubmissionType.Closed], | |
"OpenGenAI/parti-prompts-leaderboard": [Categories.Image, Categories.Generation, SubmissionType.Arena, Evaluators.Humans], | |
"speakleash/open_pl_llm_leaderboard": [Categories.LanguageSpecific, Categories.Text, Languages.Polish], | |
"malhajar/OpenLLMTurkishLeaderboard": [Categories.LanguageSpecific, Categories.Text, Languages.Turkish], | |
"allenai/WildBench": [Evaluators.Humans, SubmissionType.Arena, Evaluators.Model, Categories.Text, Categories.Generation], | |
"hf-audio/open_asr_leaderboard": [Evaluators.Automatic, Categories.Audio], | |
"opencompass/open_vlm_leaderboard": [Evaluators.Automatic, Categories.Generation, Categories.Image], | |
"livecodebench/benchmarks": [Evaluators.Automatic, Categories.Code], | |
"allenai/reward-bench": [Evaluators.Automatic, Categories.Artefacts, "Models", Categories.Text], | |
"TTS-AGI/TTS-Arena": [Evaluators.Humans, Categories.Audio] | |
} |