{ "activation": "silu", "bias": false, "ckpt_iter": 10, "d_model": 128, "dropout": 0.2, "hidden_dim": 128, "mlp": "GLU", "num_heads": 4, "num_kv_heads": 0, "num_layers": 4, "seq_len": 10, "vocab_size": 10, "weight_tying": false }