Phi-Mamba / config.json
AvivBick's picture
modular components
1e9210e verified
raw
history blame contribute delete
795 Bytes
{
"LanguageModel": {
"input": {
"vocab_size": 51200,
"pad_vocab_size_multiple": 8
}
},
"MixerModel": {
"input": {
"d_model": 2048,
"n_layer": 24,
"lm_head_prenorm": "layer"
}
},
"Block1": {
"n_layers": 24,
"BlockType": "modules.phi_block",
"block_input": {
"resid_dropout": 0.0
},
"CoreType": "modules.mixers.discrete_mamba2",
"core_input": {
"d_state": 64,
"n_v_heads": 32,
"n_qk_heads": 32,
"d_conv": 4,
"conv_bias": true,
"expand": 1,
"chunk_size": 128,
"activation": "identity",
"bias": false
}
}
}