act_fn: relu d_head: 5 d_model: 20 d_vocab: 5 n_ctx: 16 n_layers: 3 seed: null