PyTorch
llama
alignment-handbook
Generated from Trainer
Mamba2InLlama_0_75 / mamba_config.json
Junxiong Wang
add models
2804acb
raw
history blame contribute delete
402 Bytes
{
"d_model": 4096,
"ssm_cfg": {
"expand": 1,
"ngroups": 32,
"d_state": 128
},
"rms_norm_eps": 1e-05,
"vocab_size": null,
"d_inner": 4096,
"d_xb": 1024,
"intermediate_size": 14336,
"hidden_act": "silu",
"n_layer": 32,
"attn_layers": [
3,
7,
11,
15,
19,
23,
27,
31
]
}