Spaces:
Sleeping
Sleeping
import torch | |
import pandas as pd | |
from torch.utils.data import Dataset, DataLoader | |
from transformers import AutoTokenizer | |
from evo_model import EvoTransformer | |
import torch.nn as nn | |
import torch.optim as optim | |
class FeedbackDataset(Dataset): | |
def __init__(self, csv_file): | |
self.data = pd.read_csv(csv_file).dropna() | |
self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
def __len__(self): | |
return len(self.data) | |
def __getitem__(self, idx): | |
row = self.data.iloc[idx] | |
prompt = row['prompt'] | |
context = row['context'] | |
label = int(row['label']) | |
text = f"{prompt} {context}" | |
encoded = self.tokenizer(text, truncation=True, padding='max_length', max_length=128, return_tensors="pt") | |
return encoded['input_ids'].squeeze(0), torch.tensor(label) | |
def fine_tune_on_feedback(): | |
csv_file = "feedback_log.csv" | |
dataset = FeedbackDataset(csv_file) | |
dataloader = DataLoader(dataset, batch_size=8, shuffle=True) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = EvoTransformer().to(device) | |
model.load_state_dict(torch.load("evo_hellaswag.pt", map_location=device)) | |
model.train() | |
criterion = nn.CrossEntropyLoss() | |
optimizer = optim.Adam(model.parameters(), lr=2e-5) | |
for epoch in range(2): | |
for input_ids, labels in dataloader: | |
input_ids = input_ids.to(device) | |
labels = labels.to(device) | |
outputs = model(input_ids) | |
loss = criterion(outputs, labels) | |
optimizer.zero_grad() | |
loss.backward() | |
optimizer.step() | |
torch.save(model.state_dict(), "evo_hellaswag.pt") | |
print("✅ Evo retrained and saved.") | |