|
import evaluate |
|
from evaluate.evaluation_suite import SubTask |
|
|
|
|
|
class Suite(evaluate.EvaluationSuite): |
|
|
|
def __init__(self, name): |
|
super().__init__(name) |
|
self.preprocessor = lambda x: {"text": x["text"].lower()} |
|
self.suite = [ |
|
SubTask( |
|
task_type="text-classification", |
|
data="glue", |
|
subset="cola", |
|
split="test[:10]", |
|
args_for_task={ |
|
"metric": "accuracy", |
|
"input_column": "sentence", |
|
"label_column": "label", |
|
"label_mapping": { |
|
"LABEL_0": 0.0, |
|
"LABEL_1": 1.0 |
|
} |
|
} |
|
), |
|
SubTask( |
|
task_type="text-classification", |
|
data="glue", |
|
subset="sst2", |
|
split="validation[:10]", |
|
args_for_task={ |
|
"metric": "accuracy", |
|
"input_column": "sentence", |
|
"label_column": "label", |
|
"label_mapping": { |
|
"LABEL_0": 0.0, |
|
"LABEL_1": 1.0 |
|
} |
|
} |
|
), |
|
SubTask( |
|
task_type="text-classification", |
|
data="glue", |
|
subset="qqp", |
|
split="validation[:10]", |
|
args_for_task={ |
|
"metric": "accuracy", |
|
"input_column": "question1", |
|
"second_input_column": "question2", |
|
"label_column": "label", |
|
"label_mapping": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1 |
|
} |
|
} |
|
), |
|
SubTask( |
|
task_type="text-classification", |
|
data="glue", |
|
subset="mrpc", |
|
split="validation[:10]", |
|
args_for_task={ |
|
"metric": "accuracy", |
|
"input_column": "sentence1", |
|
"second_input_column": "sentence2", |
|
"label_column": "label", |
|
"label_mapping": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1 |
|
} |
|
} |
|
), |
|
SubTask( |
|
task_type="text-classification", |
|
data="glue", |
|
subset="mnli", |
|
split="validation_mismatched[:10]", |
|
args_for_task={ |
|
"metric": "accuracy", |
|
"input_column": "premise", |
|
"second_input_column": "hypothesis", |
|
"label_mapping": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1, |
|
"LABEL_2": 2 |
|
} |
|
} |
|
), |
|
SubTask( |
|
task_type="text-classification", |
|
data="glue", |
|
subset="qnli", |
|
split="validation[:10]", |
|
args_for_task={ |
|
"metric": "accuracy", |
|
"input_column": "question", |
|
"second_input_column": "sentence", |
|
"label_column": "label", |
|
"label_mapping": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1 |
|
} |
|
} |
|
), |
|
SubTask( |
|
task_type="text-classification", |
|
data="glue", |
|
subset="rte", |
|
split="validation[:10]", |
|
args_for_task={ |
|
"metric": "accuracy", |
|
"input_column": "sentence1", |
|
"second_input_column": "sentence2", |
|
"label_column": "label", |
|
"label_mapping": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1 |
|
} |
|
} |
|
), |
|
SubTask( |
|
task_type="text-classification", |
|
data="glue", |
|
subset="wnli", |
|
split="validation[:10]", |
|
args_for_task={ |
|
"metric": "accuracy", |
|
"input_column": "sentence1", |
|
"second_input_column": "sentence2", |
|
"label_column": "label", |
|
"label_mapping": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1 |
|
} |
|
} |
|
) |
|
] |
|
|