|
mport torch |
|
from transformers import GPTNeoForCausalLM, GPT2Tokenizer |
|
|
|
|
|
model = GPTNeoForCausalLM.from_pretrained('EleutherAI/gpt-neo-1.3B') |
|
tokenizer = GPT2Tokenizer.from_pretrained('EleutherAI/gpt-neo-1.3B') |
|
|
|
|
|
with open('text.txt', 'r') as f: |
|
text = f.read() |
|
|
|
|
|
encoding = tokenizer.encode(text, return_tensors='pt') |
|
|
|
|
|
model.train() |
|
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5) |
|
for i in range(100): |
|
loss = model(encoding, labels=encoding)[0] |
|
loss.backward() |
|
optimizer.step() |
|
optimizer.zero_grad() |
|
print(f'Epoch {i+1}, Loss: {loss.item()}') |
|
|
|
|
|
model.save_pretrained('fine-tuned-gpt-neo') |
|
tokenizer.save_pretrained('fine-tuned-gpt-neo') |