File size: 732 Bytes
a9a59bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load the GPT2 tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Load the training data
with open('train.txt', 'r') as f:
    text = f.read()

# Tokenize the training data
input_ids = tokenizer.encode(text, return_tensors='pt')

# Train the model
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
model.train()
for i in range(100):
    outputs = model(input_ids, labels=input_ids)
    loss = outputs[0]
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    print(f'Epoch {i+1}, Loss: {loss.item()}')

# Save the trained model
model.save_pretrained('my_gpt_model')