Spaces:
Sleeping
Sleeping

update content with the text model from Thomas repository https://huggingface.co/spaces/tombou/frugal-ai-challenge
42b7ac6
from abc import ABC, abstractmethod | |
from datasets import load_dataset, DatasetDict | |
from tasks.utils.evaluation import TextEvaluationRequest | |
class DataLoader(ABC): | |
def get_train_dataset(self): | |
pass | |
def get_test_dataset(self): | |
pass | |
class TextDataLoader(DataLoader): | |
def __init__(self, request: TextEvaluationRequest = TextEvaluationRequest(), light: bool = False): | |
self.label_mapping = { | |
"0_not_relevant": 0, | |
"1_not_happening": 1, | |
"2_not_human": 2, | |
"3_not_bad": 3, | |
"4_solutions_harmful_unnecessary": 4, | |
"5_science_unreliable": 5, | |
"6_proponents_biased": 6, | |
"7_fossil_fuels_needed": 7 | |
} | |
# Load the dataset, and convert string labels to integers | |
dataset = load_dataset(request.dataset_name) | |
dataset = dataset.map(lambda x: {"label": self.label_mapping[x["label"]]}) | |
self.dataset = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed) | |
# Create a smaller version of the dataset for quick testing | |
if light: | |
self.dataset = DatasetDict({ | |
"train": self.dataset["train"].shuffle(seed=42).select(range(10)), | |
"test": self.dataset["test"].shuffle(seed=42).select(range(2)) | |
}) | |
def get_train_dataset(self): | |
return self.dataset["train"] | |
def get_test_dataset(self): | |
return self.dataset["test"] | |
def get_label_to_id_mapping(self): | |
return self.label_mapping | |
def get_id_to_label_mapping(self): | |
return {v: k for k, v in self.label_mapping.items()} | |