Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
# ruff: noqa: F405, F403, F401 | |
""" | |
Custom evaluation tasks for lighteval. Copy this file and complete it with the info for your task. | |
This file generally create just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval. | |
Author: | |
""" | |
from lighteval.tasks.lighteval_task import LightevalTaskConfig | |
from lighteval.tasks.requests import Doc | |
from lighteval.tasks.tasks_prompt_formatting import LETTER_INDICES | |
## EVAL WITH NO SUBSET ## | |
# This is how you create a simple tasks (like hellaswag) which has one single subset | |
# attached to it, and one evaluation possible. | |
task = LightevalTaskConfig( | |
name="myothertask", | |
prompt_function="prompt_fn", # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py | |
suite=["community"], | |
hf_repo="", | |
hf_subset="default", | |
hf_avail_splits=[], | |
evaluation_splits=[], | |
few_shots_split="", | |
few_shots_select="", | |
metric=[""], | |
) | |
## EVALS WITH SUBSET | |
# This is how you create a subset task (like MMLU), which has several subset | |
# each being its own evaluation task. | |
# fmt: off | |
SAMPLE_SUBSETS = [] # list of all the subsets to use for this eval | |
# fmt: on | |
class CustomSubsetTask(LightevalTaskConfig): | |
def __init__( | |
self, | |
name, | |
hf_subset, | |
): | |
super().__init__( | |
name=name, | |
hf_subset=hf_subset, | |
prompt_function="prompt_fn", # must be defined in the file | |
hf_repo="", | |
metric=[""], | |
hf_avail_splits=[], | |
evaluation_splits=[], | |
few_shots_split="", | |
few_shots_select="", | |
suite=["community"], | |
generation_size=-1, | |
stop_sequence=None, | |
output_regex=None, | |
frozen=False, | |
) | |
## DEFINE YOUR PROMPT FUNCTIONS | |
# Define as many as you need for your different tasks | |
def prompt_fn(line, task_name: str = None): | |
"""Defines how to go from a dataset line to a doc object. | |
Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info | |
about what this function should do in the README. | |
""" | |
return Doc( | |
task_name=task_name, | |
query="", | |
choices="", | |
gold_index=0, | |
instruction="", | |
) | |
## STORE YOUR EVALS | |
SUBSET_TASKS = [CustomSubsetTask(name=f"mytask:{subset}", hf_subset=subset) for subset in SAMPLE_SUBSETS] | |
_TASKS = SUBSET_TASKS + [task] | |
## MODULE LOGIC | |
# You should not need to touch this | |
# Convert to dict for lighteval | |
TASKS_TABLE = [task.as_dict() for task in _TASKS] | |
if __name__ == "__main__": | |
print(t["name"] for t in TASKS_TABLE) | |
print(len(TASKS_TABLE)) | |