{ "architecture": "MistralForCausalLM", "dtype": "float16", "logits_dtype": "float32", "vocab_size": 32000, "max_position_embeddings": 32768, "hidden_size": 4096, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 8, "head_size": 128, "hidden_act": "silu", "intermediate_size": 14336, "norm_epsilon": 1e-05, "position_embedding_type": "rope_gpt_neox", "use_parallel_embedding": false, "embedding_sharding_dim": 0, "share_embedding_table": false, "mapping": { "world_size": 2, "tp_size": 2, "pp_size": 1 }, "quantization": { "quant_algo": null, "kv_cache_quant_algo": null, "group_size": 128, "smoothquant_val": null, "has_zero_point": false, "pre_quant_scale": false, "exclude_modules": [ "lm_head" ] }, "kv_dtype": "float16", "rotary_scaling": null, "moe_normalization_mode": null, "rotary_base": 1000000.0, "moe_num_experts": 0, "moe_top_k": 0, "moe_tp_mode": 2, "attn_bias": false, "disable_weight_only_quant_plugin": false, "mlp_bias": false }