{
    "architecture": "LlamaForCausalLM",
    "dtype": "float16",
    "logits_dtype": "float32",
    "num_hidden_layers": 32,
    "num_attention_heads": 32,
    "hidden_size": 4096,
    "intermediate_size": 14336,
    "num_key_value_heads": 8,
    "vocab_size": 128256,
    "position_embedding_type": "rope_gpt_neox",
    "max_position_embeddings": 8192,
    "hidden_act": "silu",
    "rotary_base": 10000.0,
    "rotary_scaling": null,
    "norm_epsilon": 1e-05,
    "quantization": {
        "quant_algo": null,
        "kv_cache_quant_algo": null,
        "sq_use_plugin": false,
        "exclude_modules": [
            "lm_head"
        ]
    },
    "mapping": {
        "world_size": 1,
        "tp_size": 1,
        "pp_size": 1
    },
    "use_parallel_embedding": false,
    "embedding_sharding_dim": 0,
    "share_embedding_table": false,
    "use_prompt_tuning": false,
    "moe_num_experts": 0,
    "moe_top_k": 0,
    "moe_tp_mode": 2,
    "moe_normalization_mode": 1,
    "enable_pos_shift": false,
    "dense_context_fmha": false,
    "max_lora_rank": 64,
    "lora_target_modules": null,
    "hf_modules_to_trtllm_modules": {
        "q_proj": "attn_q",
        "k_proj": "attn_k",
        "v_proj": "attn_v",
        "o_proj": "attn_dense",
        "gate_proj": "mlp_h_to_4h",
        "down_proj": "mlp_4h_to_h",
        "up_proj": "mlp_gate"
    },
    "trtllm_modules_to_hf_modules": {
        "attn_q": "q_proj",
        "attn_k": "k_proj",
        "attn_v": "v_proj",
        "attn_dense": "o_proj",
        "mlp_h_to_4h": "gate_proj",
        "mlp_4h_to_h": "down_proj",
        "mlp_gate": "up_proj"
    },
    "disable_weight_only_quant_plugin": false
}