{ "d_model": 512, "in_model": 1280, "n_heads": 8, "intermediate_size": 1280, "n_layers": 8, "norm_eps": 1e-12, "expr_bin": 32, "initializer_range": 0.02, "attention_out_bias": true, "norm_type": "layer_norm", "attention_dropout": 0.0, "mlp_dropout": 0.0 }