|
{ |
|
"_name_or_path": "checkpoints/mtgv/MobileLLaMA-1.4B-Chat", |
|
"anyprec": { |
|
"arch_config": { |
|
"layers_name": "layers", |
|
"model_name": "model", |
|
"module_names": [ |
|
"self_attn.q_proj", |
|
"self_attn.k_proj", |
|
"self_attn.v_proj", |
|
"self_attn.o_proj", |
|
"mlp.gate_proj", |
|
"mlp.up_proj", |
|
"mlp.down_proj" |
|
] |
|
}, |
|
"group_count": 1, |
|
"parent_precision": 4, |
|
"seed_precision": 2, |
|
"sparse_numvals": { |
|
"model.layers.0.mlp.down_proj": 103227, |
|
"model.layers.0.mlp.gate_proj": 104099, |
|
"model.layers.0.mlp.up_proj": 94464, |
|
"model.layers.0.self_attn.k_proj": 123232, |
|
"model.layers.0.self_attn.o_proj": 40694, |
|
"model.layers.0.self_attn.q_proj": 107706, |
|
"model.layers.0.self_attn.v_proj": 45328, |
|
"model.layers.1.mlp.down_proj": 99550, |
|
"model.layers.1.mlp.gate_proj": 105114, |
|
"model.layers.1.mlp.up_proj": 94325, |
|
"model.layers.1.self_attn.k_proj": 282518, |
|
"model.layers.1.self_attn.o_proj": 79591, |
|
"model.layers.1.self_attn.q_proj": 274280, |
|
"model.layers.1.self_attn.v_proj": 54163, |
|
"model.layers.10.mlp.down_proj": 98714, |
|
"model.layers.10.mlp.gate_proj": 119347, |
|
"model.layers.10.mlp.up_proj": 102192, |
|
"model.layers.10.self_attn.k_proj": 135560, |
|
"model.layers.10.self_attn.o_proj": 49120, |
|
"model.layers.10.self_attn.q_proj": 122608, |
|
"model.layers.10.self_attn.v_proj": 51203, |
|
"model.layers.11.mlp.down_proj": 99624, |
|
"model.layers.11.mlp.gate_proj": 121942, |
|
"model.layers.11.mlp.up_proj": 103267, |
|
"model.layers.11.self_attn.k_proj": 127872, |
|
"model.layers.11.self_attn.o_proj": 45314, |
|
"model.layers.11.self_attn.q_proj": 125543, |
|
"model.layers.11.self_attn.v_proj": 49611, |
|
"model.layers.12.mlp.down_proj": 103935, |
|
"model.layers.12.mlp.gate_proj": 134318, |
|
"model.layers.12.mlp.up_proj": 109313, |
|
"model.layers.12.self_attn.k_proj": 115429, |
|
"model.layers.12.self_attn.o_proj": 41620, |
|
"model.layers.12.self_attn.q_proj": 109014, |
|
"model.layers.12.self_attn.v_proj": 48602, |
|
"model.layers.13.mlp.down_proj": 108353, |
|
"model.layers.13.mlp.gate_proj": 158387, |
|
"model.layers.13.mlp.up_proj": 112940, |
|
"model.layers.13.self_attn.k_proj": 114487, |
|
"model.layers.13.self_attn.o_proj": 45428, |
|
"model.layers.13.self_attn.q_proj": 113406, |
|
"model.layers.13.self_attn.v_proj": 57153, |
|
"model.layers.14.mlp.down_proj": 113863, |
|
"model.layers.14.mlp.gate_proj": 168356, |
|
"model.layers.14.mlp.up_proj": 118034, |
|
"model.layers.14.self_attn.k_proj": 120529, |
|
"model.layers.14.self_attn.o_proj": 45075, |
|
"model.layers.14.self_attn.q_proj": 112276, |
|
"model.layers.14.self_attn.v_proj": 54165, |
|
"model.layers.15.mlp.down_proj": 111026, |
|
"model.layers.15.mlp.gate_proj": 162430, |
|
"model.layers.15.mlp.up_proj": 119102, |
|
"model.layers.15.self_attn.k_proj": 123737, |
|
"model.layers.15.self_attn.o_proj": 47443, |
|
"model.layers.15.self_attn.q_proj": 125065, |
|
"model.layers.15.self_attn.v_proj": 56583, |
|
"model.layers.16.mlp.down_proj": 110444, |
|
"model.layers.16.mlp.gate_proj": 154396, |
|
"model.layers.16.mlp.up_proj": 118334, |
|
"model.layers.16.self_attn.k_proj": 108657, |
|
"model.layers.16.self_attn.o_proj": 47679, |
|
"model.layers.16.self_attn.q_proj": 107248, |
|
"model.layers.16.self_attn.v_proj": 54589, |
|
"model.layers.17.mlp.down_proj": 108084, |
|
"model.layers.17.mlp.gate_proj": 143872, |
|
"model.layers.17.mlp.up_proj": 116322, |
|
"model.layers.17.self_attn.k_proj": 117888, |
|
"model.layers.17.self_attn.o_proj": 48108, |
|
"model.layers.17.self_attn.q_proj": 120507, |
|
"model.layers.17.self_attn.v_proj": 52799, |
|
"model.layers.18.mlp.down_proj": 103807, |
|
"model.layers.18.mlp.gate_proj": 131358, |
|
"model.layers.18.mlp.up_proj": 113634, |
|
"model.layers.18.self_attn.k_proj": 105856, |
|
"model.layers.18.self_attn.o_proj": 49346, |
|
"model.layers.18.self_attn.q_proj": 122501, |
|
"model.layers.18.self_attn.v_proj": 51576, |
|
"model.layers.19.mlp.down_proj": 102430, |
|
"model.layers.19.mlp.gate_proj": 123863, |
|
"model.layers.19.mlp.up_proj": 110315, |
|
"model.layers.19.self_attn.k_proj": 104299, |
|
"model.layers.19.self_attn.o_proj": 61142, |
|
"model.layers.19.self_attn.q_proj": 105187, |
|
"model.layers.19.self_attn.v_proj": 62397, |
|
"model.layers.2.mlp.down_proj": 95852, |
|
"model.layers.2.mlp.gate_proj": 95205, |
|
"model.layers.2.mlp.up_proj": 92548, |
|
"model.layers.2.self_attn.k_proj": 189410, |
|
"model.layers.2.self_attn.o_proj": 49748, |
|
"model.layers.2.self_attn.q_proj": 149970, |
|
"model.layers.2.self_attn.v_proj": 44302, |
|
"model.layers.20.mlp.down_proj": 101061, |
|
"model.layers.20.mlp.gate_proj": 118439, |
|
"model.layers.20.mlp.up_proj": 109198, |
|
"model.layers.20.self_attn.k_proj": 87935, |
|
"model.layers.20.self_attn.o_proj": 54376, |
|
"model.layers.20.self_attn.q_proj": 89672, |
|
"model.layers.20.self_attn.v_proj": 50476, |
|
"model.layers.21.mlp.down_proj": 100632, |
|
"model.layers.21.mlp.gate_proj": 109938, |
|
"model.layers.21.mlp.up_proj": 103567, |
|
"model.layers.21.self_attn.k_proj": 91720, |
|
"model.layers.21.self_attn.o_proj": 74802, |
|
"model.layers.21.self_attn.q_proj": 88150, |
|
"model.layers.21.self_attn.v_proj": 69573, |
|
"model.layers.22.mlp.down_proj": 111233, |
|
"model.layers.22.mlp.gate_proj": 121677, |
|
"model.layers.22.mlp.up_proj": 108841, |
|
"model.layers.22.self_attn.k_proj": 100375, |
|
"model.layers.22.self_attn.o_proj": 61122, |
|
"model.layers.22.self_attn.q_proj": 101996, |
|
"model.layers.22.self_attn.v_proj": 59543, |
|
"model.layers.23.mlp.down_proj": 165882, |
|
"model.layers.23.mlp.gate_proj": 149269, |
|
"model.layers.23.mlp.up_proj": 156710, |
|
"model.layers.23.self_attn.k_proj": 74217, |
|
"model.layers.23.self_attn.o_proj": 70134, |
|
"model.layers.23.self_attn.q_proj": 71364, |
|
"model.layers.23.self_attn.v_proj": 66215, |
|
"model.layers.3.mlp.down_proj": 94469, |
|
"model.layers.3.mlp.gate_proj": 93720, |
|
"model.layers.3.mlp.up_proj": 93335, |
|
"model.layers.3.self_attn.k_proj": 129559, |
|
"model.layers.3.self_attn.o_proj": 39091, |
|
"model.layers.3.self_attn.q_proj": 100716, |
|
"model.layers.3.self_attn.v_proj": 39453, |
|
"model.layers.4.mlp.down_proj": 97785, |
|
"model.layers.4.mlp.gate_proj": 97248, |
|
"model.layers.4.mlp.up_proj": 95089, |
|
"model.layers.4.self_attn.k_proj": 128341, |
|
"model.layers.4.self_attn.o_proj": 37298, |
|
"model.layers.4.self_attn.q_proj": 92904, |
|
"model.layers.4.self_attn.v_proj": 38505, |
|
"model.layers.5.mlp.down_proj": 96379, |
|
"model.layers.5.mlp.gate_proj": 95376, |
|
"model.layers.5.mlp.up_proj": 94082, |
|
"model.layers.5.self_attn.k_proj": 122556, |
|
"model.layers.5.self_attn.o_proj": 43788, |
|
"model.layers.5.self_attn.q_proj": 102967, |
|
"model.layers.5.self_attn.v_proj": 47872, |
|
"model.layers.6.mlp.down_proj": 94813, |
|
"model.layers.6.mlp.gate_proj": 99275, |
|
"model.layers.6.mlp.up_proj": 94511, |
|
"model.layers.6.self_attn.k_proj": 123676, |
|
"model.layers.6.self_attn.o_proj": 49440, |
|
"model.layers.6.self_attn.q_proj": 103919, |
|
"model.layers.6.self_attn.v_proj": 57010, |
|
"model.layers.7.mlp.down_proj": 94883, |
|
"model.layers.7.mlp.gate_proj": 102457, |
|
"model.layers.7.mlp.up_proj": 97437, |
|
"model.layers.7.self_attn.k_proj": 100675, |
|
"model.layers.7.self_attn.o_proj": 42193, |
|
"model.layers.7.self_attn.q_proj": 83228, |
|
"model.layers.7.self_attn.v_proj": 45513, |
|
"model.layers.8.mlp.down_proj": 97843, |
|
"model.layers.8.mlp.gate_proj": 113784, |
|
"model.layers.8.mlp.up_proj": 103097, |
|
"model.layers.8.self_attn.k_proj": 113650, |
|
"model.layers.8.self_attn.o_proj": 52513, |
|
"model.layers.8.self_attn.q_proj": 92988, |
|
"model.layers.8.self_attn.v_proj": 60755, |
|
"model.layers.9.mlp.down_proj": 96823, |
|
"model.layers.9.mlp.gate_proj": 112042, |
|
"model.layers.9.mlp.up_proj": 102440, |
|
"model.layers.9.self_attn.k_proj": 128169, |
|
"model.layers.9.self_attn.o_proj": 54639, |
|
"model.layers.9.self_attn.q_proj": 116292, |
|
"model.layers.9.self_attn.v_proj": 60561 |
|
} |
|
}, |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"max_sequence_length": 2048, |
|
"model_type": "llama", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 24, |
|
"num_key_value_heads": 16, |
|
"pad_token_id": 0, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.39.3", |
|
"use_cache": true, |
|
"vocab_size": 32000 |
|
} |
|
|