attn_pdrop: 0.1 b1: 0.9 b2: 0.95 batch_size: 4096 blocks: 6 d_model: 128 embd_pdrop: 0.1 epochs: 50000 heads: 4 lr: 0.0003 model_type: gpt num_final_chars_in_dataset: 2 num_workers: 4 resid_pdrop: 0.1 stoi: .: 0 a: 1 b: 2 c: 3 d: 4 e: 5 f: 6 g: 7 h: 8 i: 9 j: 10 k: 11 l: 12 m: 13 n: 14 o: 15 p: 16 q: 17 r: 18 s: 19 t: 20 u: 21 v: 22 w: 23 x: 24 y: 25 z: 26 vocab: 27 weight_decay: 0.1 window: 32