|
{ |
|
"LanguageModel": { |
|
"input": { |
|
"vocab_size": 51200, |
|
"pad_vocab_size_multiple": 8 |
|
} |
|
}, |
|
"MixerModel": { |
|
"input": { |
|
"d_model": 2048, |
|
"n_layer": 24, |
|
"lm_head_prenorm": "layer" |
|
} |
|
}, |
|
"Block1": { |
|
"n_layers": 24, |
|
"BlockType": "modules.phi_block", |
|
"block_input": { |
|
"resid_dropout": 0.0 |
|
}, |
|
"CoreType": "modules.mixers.discrete_mamba2", |
|
"core_input": { |
|
"d_state": 64, |
|
"n_v_heads": 32, |
|
"n_qk_heads": 32, |
|
"d_conv": 4, |
|
"conv_bias": true, |
|
"expand": 1, |
|
"chunk_size": 128, |
|
"activation": "identity", |
|
"bias": false |
|
} |
|
} |
|
} |
|
|