# @package _global_ # gpt2 inspired, even bigger (~3.3B params) transformer_lm: dim: 2048 num_heads: 32 num_layers: 48