{ "producer": { "name": "ammo", "version": "0.7.3" }, "architecture": "LlamaForCausalLM", "dtype": "float16", "num_hidden_layers": 48, "num_attention_heads": 32, "num_key_value_heads": 8, "hidden_size": 4096, "norm_epsilon": 1e-05, "vocab_size": 32000, "max_position_embeddings": 4096, "hidden_act": "silu", "use_parallel_embedding": true, "embedding_sharding_dim": 0, "quantization": { "quant_algo": null, "kv_cache_quant_algo": "FP8" }, "mapping": { "world_size": 1, "tp_size": 1, "pp_size": 1 }, "head_size": 128, "intermediate_size": 14336, "position_embedding_type": "rope_gpt_neox" }