from dataclasses import dataclass from enum import Enum @dataclass class Task: name: str # qa, long_doc @dataclass class Metric: name: str # ndcg_at_1 @dataclass class Language: name: str # en, zh @dataclass class Domain: name: str # law, wiki @dataclass class EmbeddingModel: full_name: str # jinaai/jina-embeddings-v2-en-base org: str # jinaai model: str # jina-embeddings-v2-en-base size: int # size (millions of parameters) dim: int # output dimensions max_tokens: int # max tokens model_type: str # open, proprietary, sentence transformers NUM_FEWSHOT = 0 # Change with your few shot # --------------------------------------------------- # Your leaderboard name TITLE = """

AIR-Bench

""" # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ AIR-Bench: Automated Heterogeneous Information Retrieval Benchmark """ # Which evaluations are you running? how can people reproduce what you have? LLM_BENCHMARKS_TEXT = f""" ## How it works ## Reproducibility To reproduce our results, here is the commands you can run: """ EVALUATION_QUEUE_TEXT = """ ## Some good practices before submitting a model ### 1) ### 2) ### 3) ### 4) ## In case of model failure If your model is displayed in the `FAILED` category, its execution stopped. Make sure you have followed the above steps first. If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task). """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r""" """