|
{ |
|
"alpha": 0, |
|
"architectures": [ |
|
"GPT" |
|
], |
|
"auto_map": { |
|
"AutoConfig": "adapter_v2.ConfigSMOE", |
|
"AutoModelForCausalLM": "adapter_v2.GPT" |
|
}, |
|
"bias": true, |
|
"block_size": 2048, |
|
"gelu_approximate": "tanh", |
|
"head_size": 64, |
|
"hf_config": { |
|
"name": "phi-1_5", |
|
"org": "microsoft" |
|
}, |
|
"intermediate_size": 8192, |
|
"lm_head_bias": true, |
|
"mlp_class_name": "GptNeoxMLP", |
|
"model_type": "gpt", |
|
"n_embd": 2048, |
|
"n_expert": 0, |
|
"n_expert_per_token": 0, |
|
"n_head": 32, |
|
"n_layer": 24, |
|
"n_query_groups": 32, |
|
"name": "phi-1_5", |
|
"norm_class_name": "LayerNorm", |
|
"norm_eps": 1e-05, |
|
"num_experts": 4, |
|
"padded_vocab_size": 51200, |
|
"padding_multiple": 512, |
|
"parallel_residual": true, |
|
"rope_base": 10000, |
|
"rope_condense_ratio": 1, |
|
"rope_n_elem": 32, |
|
"rotary_percentage": 0.5, |
|
"scale_embeddings": false, |
|
"shared_attention_norm": true, |
|
"top_k": 1, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.41.2", |
|
"use_smoe": false, |
|
"vocab_size": 50257 |
|
} |
|
|