import torch import torch.nn as nn from transformers import GPT2Tokenizer, GPT2LMHeadModel from datasets import load_dataset from torch.utils.data import DataLoader # Load the GPT-2 tokenizer and model tokenizer = GPT2Tokenizer.from_pretrained("gpt2") model = GPT2LMHeadModel.from_pretrained("gpt2") # Define the loss function and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) # Get the training dataset from Hugging Face dataset = load_dataset("wikitext", 'wikitext-103-v1') # Define the number of training steps num_steps = 1000 # Define the data loader data_loader = DataLoader(dataset, batch_size=32, shuffle=True) # Training loop for step in range(num_steps): # Get the next batch of data input_ids = dataset['train'][step]['text'] labels = dataset['train'][step]['text'] if not input_ids or not labels: continue input_ids = tokenizer.encode(input_ids, return_tensors='pt').unsqueeze(0) labels = tokenizer.encode(labels, return_tensors='pt').unsqueeze(0) # Forward pass outputs = model(input_ids) logits = outputs[0] # Compute the loss loss = criterion(logits.view(-1, logits.size(-1)), labels.view(-1)) # Backward pass and optimization optimizer.zero_grad() loss.backward() optimizer.step() # Print the current loss if step % 100 == 0: print(f'Step {step}, Loss {loss.item()}') # Save the fine-tuned model torch.save(model.state_dict(), 'fine_tuned_gpt2.pth')