File size: 247 Bytes
d7696be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
import torch
# hyperparameters
batch_size = 64
block_size = 256
max_iters = 5000
eval_interval = 500
learning_rate = 3e-4
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 200
n_embd = 384
n_head = 6
n_layer = 6
dropout = 0.2 |