TH / README.py
bachiembmt's picture
Create README.py
8747974
import torch
import torch.nn as nn
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from datasets import load_dataset
from torch.utils.data import DataLoader
# Load the GPT-2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
# Get the training dataset from Hugging Face
dataset = load_dataset("wikitext", 'wikitext-103-v1')
# Define the number of training steps
num_steps = 1000
# Define the data loader
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)
# Training loop
for step in range(num_steps):
# Get the next batch of data
input_ids = dataset['train'][step]['text']
labels = dataset['train'][step]['text']
if not input_ids or not labels:
continue
input_ids = tokenizer.encode(input_ids, return_tensors='pt').unsqueeze(0)
labels = tokenizer.encode(labels, return_tensors='pt').unsqueeze(0)
# Forward pass
outputs = model(input_ids)
logits = outputs[0]
# Compute the loss
loss = criterion(logits.view(-1, logits.size(-1)), labels.view(-1))
# Backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Print the current loss
if step % 100 == 0:
print(f'Step {step}, Loss {loss.item()}')
# Save the fine-tuned model
torch.save(model.state_dict(), 'fine_tuned_gpt2.pth')