joey00072's picture
Push model using huggingface_hub.
28a9f48 verified
{
"bias": false,
"capacity_factor": 0.12,
"d_model": 1024,
"dropout": 0.2,
"ffn": "swiglu",
"hidden_dim": 4096,
"mixture_of_depth": true,
"mixture_of_expert": false,
"model_type": {
"mixture_of_depth": true,
"name": "mixture of depth"
},
"moe_num_experts": 4,
"moe_num_experts_per_tok": 2,
"multiple_of": 4,
"num_heads": 16,
"num_kv_heads": 0,
"num_layers": 16,
"seq_len": 512,
"vocab_size": 50257,
"weight_tying": true,
"window_size": 128
}