agentic-Transformer / Dataset.py
dnnsdunca's picture
Create Dataset.py
a5dd61d verified
raw
history blame contribute delete
902 Bytes
import pandas as pd
from transformers import AutoTokenizer
class MyDataset:
def __init__(self, data_file, tokenizer):
self.data = pd.read_csv(data_file)
self.tokenizer = tokenizer
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
text = self.data.iloc[idx, 0]
agents = self.data.iloc[idx, 1]
actions = self.data.iloc[idx, 2]
encoding = self.tokenizer.encode_plus(
text,
max_length=512,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
return {
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'labels_agents': torch.tensor(agents),
'labels_actions': torch.tensor(actions)
}