File size: 732 Bytes
a9a59bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
# Load the GPT2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
# Load the training data
with open('train.txt', 'r') as f:
text = f.read()
# Tokenize the training data
input_ids = tokenizer.encode(text, return_tensors='pt')
# Train the model
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
model.train()
for i in range(100):
outputs = model(input_ids, labels=input_ids)
loss = outputs[0]
loss.backward()
optimizer.step()
optimizer.zero_grad()
print(f'Epoch {i+1}, Loss: {loss.item()}')
# Save the trained model
model.save_pretrained('my_gpt_model')
|