Spaces:
Running
Running
| import evaluate | |
| from evaluate.evaluation_suite import SubTask | |
| # This is odd because the first dataset is multi-class and | |
| # the second dataset is binary. The model I'm using has 4 labels | |
| # and is finetuned to the first dataset. | |
| # So what does it mean for this model to be evaluated on the second | |
| # dataset? | |
| metric = evaluate.combine(["accuracy"]) | |
| class Suite(evaluate.EvaluationSuite): | |
| def __init__(self, name): | |
| super().__init__(name) | |
| self.preprocessor = lambda x: {"text": x["text"].lower()} | |
| self.suite = [ | |
| SubTask( | |
| task_type="text-classification", | |
| data="hate_speech18", | |
| split="train[:1000]", | |
| args_for_task={ | |
| "metric": metric, | |
| "input_column": "text", | |
| "label_column": "label", | |
| "label_mapping": { | |
| "NO_HATE": 0.0, | |
| "HATE": 1.0, | |
| "RELATION": 1.0, | |
| "IDK": 1.0 | |
| } | |
| } | |
| ), | |
| SubTask( | |
| task_type="text-classification", | |
| data="mteb/toxic_conversations_50k", | |
| split="test[:1000]", | |
| args_for_task={ | |
| "metric": metric, | |
| "input_column": "text", | |
| "label_column": "label", | |
| "label_mapping": { | |
| "NO_HATE": 0.0, | |
| "HATE": 1.0 | |
| } | |
| } | |
| ) | |
| ] | |