|
import torch |
|
import torch.nn as nn |
|
from transformers import GPT2Tokenizer, GPT2LMHeadModel |
|
from datasets import load_dataset |
|
from torch.utils.data import DataLoader |
|
|
|
|
|
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") |
|
model = GPT2LMHeadModel.from_pretrained("gpt2") |
|
|
|
|
|
criterion = nn.CrossEntropyLoss() |
|
optimizer = torch.optim.Adam(model.parameters()) |
|
|
|
|
|
dataset = load_dataset("wikitext", 'wikitext-103-v1') |
|
|
|
|
|
num_steps = 1000 |
|
|
|
|
|
data_loader = DataLoader(dataset, batch_size=32, shuffle=True) |
|
|
|
|
|
for step in range(num_steps): |
|
|
|
|
|
input_ids = dataset['train'][step]['text'] |
|
labels = dataset['train'][step]['text'] |
|
|
|
if not input_ids or not labels: |
|
continue |
|
|
|
input_ids = tokenizer.encode(input_ids, return_tensors='pt').unsqueeze(0) |
|
labels = tokenizer.encode(labels, return_tensors='pt').unsqueeze(0) |
|
|
|
|
|
|
|
outputs = model(input_ids) |
|
logits = outputs[0] |
|
|
|
|
|
loss = criterion(logits.view(-1, logits.size(-1)), labels.view(-1)) |
|
|
|
|
|
optimizer.zero_grad() |
|
loss.backward() |
|
optimizer.step() |
|
|
|
|
|
if step % 100 == 0: |
|
print(f'Step {step}, Loss {loss.item()}') |
|
|
|
|
|
torch.save(model.state_dict(), 'fine_tuned_gpt2.pth') |
|
|