from typing import Optional, Union, Callable from dataclasses import dataclass from datasets import Dataset @dataclass class SubTask: model_or_pipeline: Optional[Union[str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel"]] = None data: Optional[Union[str, Dataset]] = None subset: Optional[str] = None split: Optional[str] = None data_preprocessor: Optional[Callable] = None, args_for_task: Optional[dict] = None preprocessor = lambda x: x["text"].lower() suite = [ SubTask( data="imdb", split="test", data_preprocessor=preprocessor, args_for_task={ "metric": "accuracy", "input_column": "text", "label_column": "label", "label_mapping": { "LABEL_0": 0.0, "LABEL_1": 1.0 } } ), SubTask( data="sst2", split="test[:10]", data_preprocessor=preprocessor, args_for_task={ "metric": "accuracy", "input_column": "sentence", "label_column": "label", "label_mapping": { "LABEL_0": 0.0, "LABEL_1": 1.0 } } ) ]