{ "alpha": 0, "architectures": [ "GPT" ], "auto_map": { "AutoConfig": "adapter_v2.ConfigSMOE", "AutoModelForCausalLM": "adapter_v2.GPT" }, "bias": true, "block_size": 2048, "gelu_approximate": "tanh", "head_size": 64, "hf_config": { "name": "phi-1_5", "org": "microsoft" }, "intermediate_size": 8192, "lm_head_bias": true, "mlp_class_name": "GptNeoxMLP", "model_type": "gpt", "n_embd": 2048, "n_expert": 0, "n_expert_per_token": 0, "n_head": 32, "n_layer": 24, "n_query_groups": 32, "name": "phi-1_5", "norm_class_name": "LayerNorm", "norm_eps": 1e-05, "num_experts": 4, "padded_vocab_size": 51200, "padding_multiple": 512, "parallel_residual": true, "rope_base": 10000, "rope_condense_ratio": 1, "rope_n_elem": 32, "rotary_percentage": 0.5, "scale_embeddings": false, "shared_attention_norm": true, "top_k": 1, "torch_dtype": "float32", "transformers_version": "4.41.2", "use_smoe": false, "vocab_size": 50257 }