{"config": {"sample_key": "input_ids", "prediction_key": "logits", "block_size": 1024, "vocab_size": 50304, "n_layer": 4, "n_head": 16, "n_embd": 1024, "ffn_hidden": 4096, "dropout": 0.01, "bias": true, "attention": {"attention_type": "default_attention", "scaling_factor": 3}, "activation": "gelu", "epsilon": 1e-05, "weight_init": {"mean": 0.0, "std": 0.02}}, "model_type": "llm_gym_gpt2"}