{ "_name_or_path": "checkpoints/microsoft/phi-1_5", "anyprec": { "arch_config": { "layers_name": "layers", "model_name": "model", "module_names": [ "self_attn.q_proj", "self_attn.k_proj", "self_attn.v_proj", "self_attn.dense", "mlp.fc1", "mlp.fc2" ] }, "group_count": 1, "parent_precision": 4, "seed_precision": 2, "sparse_numvals": { "model.layers.0.mlp.fc1": 675311, "model.layers.0.mlp.fc2": 624542, "model.layers.0.self_attn.dense": 82454, "model.layers.0.self_attn.k_proj": 180985, "model.layers.0.self_attn.q_proj": 156889, "model.layers.0.self_attn.v_proj": 94332, "model.layers.1.mlp.fc1": 201243, "model.layers.1.mlp.fc2": 325345, "model.layers.1.self_attn.dense": 73540, "model.layers.1.self_attn.k_proj": 103462, "model.layers.1.self_attn.q_proj": 99058, "model.layers.1.self_attn.v_proj": 86333, "model.layers.10.mlp.fc1": 295445, "model.layers.10.mlp.fc2": 323451, "model.layers.10.self_attn.dense": 72926, "model.layers.10.self_attn.k_proj": 100535, "model.layers.10.self_attn.q_proj": 94643, "model.layers.10.self_attn.v_proj": 85469, "model.layers.11.mlp.fc1": 291450, "model.layers.11.mlp.fc2": 321914, "model.layers.11.self_attn.dense": 71540, "model.layers.11.self_attn.k_proj": 97013, "model.layers.11.self_attn.q_proj": 89427, "model.layers.11.self_attn.v_proj": 82468, "model.layers.12.mlp.fc1": 285509, "model.layers.12.mlp.fc2": 328599, "model.layers.12.self_attn.dense": 69830, "model.layers.12.self_attn.k_proj": 101851, "model.layers.12.self_attn.q_proj": 94202, "model.layers.12.self_attn.v_proj": 84071, "model.layers.13.mlp.fc1": 277413, "model.layers.13.mlp.fc2": 308466, "model.layers.13.self_attn.dense": 74257, "model.layers.13.self_attn.k_proj": 101329, "model.layers.13.self_attn.q_proj": 94394, "model.layers.13.self_attn.v_proj": 83090, "model.layers.14.mlp.fc1": 272080, "model.layers.14.mlp.fc2": 347434, "model.layers.14.self_attn.dense": 77486, "model.layers.14.self_attn.k_proj": 99568, "model.layers.14.self_attn.q_proj": 97367, "model.layers.14.self_attn.v_proj": 85949, "model.layers.15.mlp.fc1": 262687, "model.layers.15.mlp.fc2": 326298, "model.layers.15.self_attn.dense": 71091, "model.layers.15.self_attn.k_proj": 99666, "model.layers.15.self_attn.q_proj": 114009, "model.layers.15.self_attn.v_proj": 79472, "model.layers.16.mlp.fc1": 255098, "model.layers.16.mlp.fc2": 365511, "model.layers.16.self_attn.dense": 69991, "model.layers.16.self_attn.k_proj": 97043, "model.layers.16.self_attn.q_proj": 101120, "model.layers.16.self_attn.v_proj": 75770, "model.layers.17.mlp.fc1": 245724, "model.layers.17.mlp.fc2": 337114, "model.layers.17.self_attn.dense": 70591, "model.layers.17.self_attn.k_proj": 92819, "model.layers.17.self_attn.q_proj": 91892, "model.layers.17.self_attn.v_proj": 74253, "model.layers.18.mlp.fc1": 240816, "model.layers.18.mlp.fc2": 335322, "model.layers.18.self_attn.dense": 79137, "model.layers.18.self_attn.k_proj": 98409, "model.layers.18.self_attn.q_proj": 123879, "model.layers.18.self_attn.v_proj": 85293, "model.layers.19.mlp.fc1": 234256, "model.layers.19.mlp.fc2": 317669, "model.layers.19.self_attn.dense": 80277, "model.layers.19.self_attn.k_proj": 97895, "model.layers.19.self_attn.q_proj": 121723, "model.layers.19.self_attn.v_proj": 82971, "model.layers.2.mlp.fc1": 233518, "model.layers.2.mlp.fc2": 315355, "model.layers.2.self_attn.dense": 68242, "model.layers.2.self_attn.k_proj": 102966, "model.layers.2.self_attn.q_proj": 98281, "model.layers.2.self_attn.v_proj": 83855, "model.layers.20.mlp.fc1": 230157, "model.layers.20.mlp.fc2": 317412, "model.layers.20.self_attn.dense": 70557, "model.layers.20.self_attn.k_proj": 96874, "model.layers.20.self_attn.q_proj": 117460, "model.layers.20.self_attn.v_proj": 75849, "model.layers.21.mlp.fc1": 227363, "model.layers.21.mlp.fc2": 323600, "model.layers.21.self_attn.dense": 73035, "model.layers.21.self_attn.k_proj": 93176, "model.layers.21.self_attn.q_proj": 124248, "model.layers.21.self_attn.v_proj": 75505, "model.layers.22.mlp.fc1": 233020, "model.layers.22.mlp.fc2": 395456, "model.layers.22.self_attn.dense": 71502, "model.layers.22.self_attn.k_proj": 88462, "model.layers.22.self_attn.q_proj": 162865, "model.layers.22.self_attn.v_proj": 73909, "model.layers.23.mlp.fc1": 285355, "model.layers.23.mlp.fc2": 631745, "model.layers.23.self_attn.dense": 101963, "model.layers.23.self_attn.k_proj": 107304, "model.layers.23.self_attn.q_proj": 260586, "model.layers.23.self_attn.v_proj": 107005, "model.layers.3.mlp.fc1": 269841, "model.layers.3.mlp.fc2": 330081, "model.layers.3.self_attn.dense": 76738, "model.layers.3.self_attn.k_proj": 114282, "model.layers.3.self_attn.q_proj": 110068, "model.layers.3.self_attn.v_proj": 96670, "model.layers.4.mlp.fc1": 305604, "model.layers.4.mlp.fc2": 333478, "model.layers.4.self_attn.dense": 73404, "model.layers.4.self_attn.k_proj": 105649, "model.layers.4.self_attn.q_proj": 102666, "model.layers.4.self_attn.v_proj": 92391, "model.layers.5.mlp.fc1": 293406, "model.layers.5.mlp.fc2": 337582, "model.layers.5.self_attn.dense": 71678, "model.layers.5.self_attn.k_proj": 120017, "model.layers.5.self_attn.q_proj": 121205, "model.layers.5.self_attn.v_proj": 92099, "model.layers.6.mlp.fc1": 291972, "model.layers.6.mlp.fc2": 329924, "model.layers.6.self_attn.dense": 81259, "model.layers.6.self_attn.k_proj": 104051, "model.layers.6.self_attn.q_proj": 100833, "model.layers.6.self_attn.v_proj": 93397, "model.layers.7.mlp.fc1": 293548, "model.layers.7.mlp.fc2": 331966, "model.layers.7.self_attn.dense": 68519, "model.layers.7.self_attn.k_proj": 108909, "model.layers.7.self_attn.q_proj": 103642, "model.layers.7.self_attn.v_proj": 84278, "model.layers.8.mlp.fc1": 304480, "model.layers.8.mlp.fc2": 318568, "model.layers.8.self_attn.dense": 76294, "model.layers.8.self_attn.k_proj": 110748, "model.layers.8.self_attn.q_proj": 103303, "model.layers.8.self_attn.v_proj": 91497, "model.layers.9.mlp.fc1": 298086, "model.layers.9.mlp.fc2": 319091, "model.layers.9.self_attn.dense": 68561, "model.layers.9.self_attn.k_proj": 109187, "model.layers.9.self_attn.q_proj": 103326, "model.layers.9.self_attn.v_proj": 83167 } }, "architectures": [ "PhiForCausalLM" ], "attention_dropout": 0.0, "bos_token_id": null, "embd_pdrop": 0.0, "eos_token_id": null, "hidden_act": "gelu_new", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "layer_norm_eps": 1e-05, "max_position_embeddings": 2048, "model_type": "phi", "num_attention_heads": 32, "num_hidden_layers": 24, "num_key_value_heads": 32, "partial_rotary_factor": 0.5, "qk_layernorm": false, "resid_pdrop": 0.0, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.39.3", "use_cache": true, "vocab_size": 51200 }