import pandas as pd from transformers import AutoTokenizer class MyDataset: def __init__(self, data_file, tokenizer): self.data = pd.read_csv(data_file) self.tokenizer = tokenizer def __len__(self): return len(self.data) def __getitem__(self, idx): text = self.data.iloc[idx, 0] agents = self.data.iloc[idx, 1] actions = self.data.iloc[idx, 2] encoding = self.tokenizer.encode_plus( text, max_length=512, padding='max_length', truncation=True, return_attention_mask=True, return_tensors='pt' ) return { 'input_ids': encoding['input_ids'].flatten(), 'attention_mask': encoding['attention_mask'].flatten(), 'labels_agents': torch.tensor(agents), 'labels_actions': torch.tensor(actions) }