{
  "alpha": 0,
  "architectures": [
    "GPT"
  ],
  "auto_map": {
    "AutoConfig": "adapter_v2.ConfigSMOE",
    "AutoModelForCausalLM": "adapter_v2.GPT"
  },
  "bias": true,
  "block_size": 2048,
  "gelu_approximate": "tanh",
  "head_size": 64,
  "hf_config": {
    "name": "phi-1_5",
    "org": "microsoft"
  },
  "intermediate_size": 8192,
  "lm_head_bias": true,
  "mlp_class_name": "GptNeoxMLP",
  "model_type": "gpt",
  "n_embd": 2048,
  "n_expert": 0,
  "n_expert_per_token": 0,
  "n_head": 32,
  "n_layer": 24,
  "n_query_groups": 32,
  "name": "phi-1_5",
  "norm_class_name": "LayerNorm",
  "norm_eps": 1e-05,
  "num_experts": 4,
  "padded_vocab_size": 51200,
  "padding_multiple": 512,
  "parallel_residual": true,
  "rope_base": 10000,
  "rope_condense_ratio": 1,
  "rope_n_elem": 32,
  "rotary_percentage": 0.5,
  "scale_embeddings": false,
  "shared_attention_norm": true,
  "top_k": 1,
  "torch_dtype": "float32",
  "transformers_version": "4.41.2",
  "use_smoe": false,
  "vocab_size": 50257
}