act_fn: relu d_head: 8 d_model: 32 d_vocab: 5 n_ctx: 16 n_layers: 3 seed: null