act_fn: relu d_head: 3 d_model: 12 d_vocab: 4 n_ctx: 15 n_layers: 1 seed: 42