attn_pdrop: 0.1 b1: 0.9 b2: 0.95 batch_size: 4096 blocks: 6 d_model: 128 embd_pdrop: 0.1 epochs: 50000 heads: 4 lr: 0.0003 model_type: gpt num_final_chars_in_dataset: 2 num_workers: 4 resid_pdrop: 0.1 stoi: ' ': 1 '''': 2 '-': 3 .: 0 a: 4 b: 5 c: 6 d: 7 e: 8 f: 9 g: 10 h: 11 i: 12 j: 13 k: 14 l: 15 m: 16 n: 17 o: 18 p: 19 q: 20 r: 21 s: 22 t: 23 u: 24 v: 25 w: 26 x: 27 y: 28 z: 29 vocab: 30 weight_decay: 0.1 window: 32