{ "vocab_size": 32000, "max_position_embeddings": 4096, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "initializer_range": 0.02, "rms_norm_eps": 1e-05, "pretraining_tp": 1, "use_cache": true, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "attention_dropout": 0.0, "mlp_bias": false, "torch_dtype": null, "tie_word_embeddings": false, "architectures": [], "bos_token_id": 1, "eos_token_id": 2, "_name_or_path": "/mnt/6e3c126c-c6bb-43eb-9d82-1e59b2111688/ecrncevi/Llama-2-7b-hf", "transformers_version": "4.44.2", "model_type": "llama", "quantization_config": { "quant_method": "spqr", "beta1": 16, "beta2": 16, "bits": 3, "modules_to_not_convert": [ "model.embed_tokens.weight", "model.layers.0.input_layernorm.weight", "model.layers.0.post_attention_layernorm.weight", "model.layers.1.input_layernorm.weight", "model.layers.1.post_attention_layernorm.weight", "model.layers.2.input_layernorm.weight", "model.layers.2.post_attention_layernorm.weight", "model.layers.3.input_layernorm.weight", "model.layers.3.post_attention_layernorm.weight", "model.layers.4.input_layernorm.weight", "model.layers.4.post_attention_layernorm.weight", "model.layers.5.input_layernorm.weight", "model.layers.5.post_attention_layernorm.weight", "model.layers.6.input_layernorm.weight", "model.layers.6.post_attention_layernorm.weight", "model.layers.7.input_layernorm.weight", "model.layers.7.post_attention_layernorm.weight", "model.layers.8.input_layernorm.weight", "model.layers.8.post_attention_layernorm.weight", "model.layers.9.input_layernorm.weight", "model.layers.9.post_attention_layernorm.weight", "model.layers.10.input_layernorm.weight", "model.layers.10.post_attention_layernorm.weight", "model.layers.11.input_layernorm.weight", "model.layers.11.post_attention_layernorm.weight", "model.layers.12.input_layernorm.weight", "model.layers.12.post_attention_layernorm.weight", "model.layers.13.input_layernorm.weight", "model.layers.13.post_attention_layernorm.weight", "model.layers.14.input_layernorm.weight", "model.layers.14.post_attention_layernorm.weight", "model.layers.15.input_layernorm.weight", "model.layers.15.post_attention_layernorm.weight", "model.layers.16.input_layernorm.weight", "model.layers.16.post_attention_layernorm.weight", "model.layers.17.input_layernorm.weight", "model.layers.17.post_attention_layernorm.weight", "model.layers.18.input_layernorm.weight", "model.layers.18.post_attention_layernorm.weight", "model.layers.19.input_layernorm.weight", "model.layers.19.post_attention_layernorm.weight", "model.layers.20.input_layernorm.weight", "model.layers.20.post_attention_layernorm.weight", "model.layers.21.input_layernorm.weight", "model.layers.21.post_attention_layernorm.weight", "model.layers.22.input_layernorm.weight", "model.layers.22.post_attention_layernorm.weight", "model.layers.23.input_layernorm.weight", "model.layers.23.post_attention_layernorm.weight", "model.layers.24.input_layernorm.weight", "model.layers.24.post_attention_layernorm.weight", "model.layers.25.input_layernorm.weight", "model.layers.25.post_attention_layernorm.weight", "model.layers.26.input_layernorm.weight", "model.layers.26.post_attention_layernorm.weight", "model.layers.27.input_layernorm.weight", "model.layers.27.post_attention_layernorm.weight", "model.layers.28.input_layernorm.weight", "model.layers.28.post_attention_layernorm.weight", "model.layers.29.input_layernorm.weight", "model.layers.29.post_attention_layernorm.weight", "model.layers.30.input_layernorm.weight", "model.layers.30.post_attention_layernorm.weight", "model.layers.31.input_layernorm.weight", "model.layers.31.post_attention_layernorm.weight", "model.norm.weight", "lm_head.weight" ], "shapes": { "model.layers.0.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.0.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.0.self_attn.q_proj.col_vals.shape": 44646, "model.layers.0.self_attn.q_proj.in_perm.shape": 2048, "model.layers.0.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.0.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.0.self_attn.k_proj.col_vals.shape": 54665, "model.layers.0.self_attn.k_proj.in_perm.shape": 2048, "model.layers.0.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.0.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.0.self_attn.v_proj.col_vals.shape": 344580, "model.layers.0.self_attn.v_proj.in_perm.shape": 2048, "model.layers.0.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.0.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.0.self_attn.o_proj.col_vals.shape": 326823, "model.layers.0.self_attn.o_proj.in_perm.shape": 2048, "model.layers.0.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.0.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.0.mlp.gate_proj.col_vals.shape": 767703, "model.layers.0.mlp.gate_proj.in_perm.shape": 2048, "model.layers.0.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.0.mlp.up_proj.row_offsets.shape": 11009, "model.layers.0.mlp.up_proj.col_vals.shape": 781135, "model.layers.0.mlp.up_proj.in_perm.shape": 2048, "model.layers.0.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.0.mlp.down_proj.row_offsets.shape": 4097, "model.layers.0.mlp.down_proj.col_vals.shape": 807092, "model.layers.0.mlp.down_proj.in_perm.shape": 5504, "model.layers.1.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.1.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.1.self_attn.q_proj.col_vals.shape": 247251, "model.layers.1.self_attn.q_proj.in_perm.shape": 2048, "model.layers.1.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.1.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.1.self_attn.k_proj.col_vals.shape": 239754, "model.layers.1.self_attn.k_proj.in_perm.shape": 2048, "model.layers.1.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.1.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.1.self_attn.v_proj.col_vals.shape": 360082, "model.layers.1.self_attn.v_proj.in_perm.shape": 2048, "model.layers.1.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.1.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.1.self_attn.o_proj.col_vals.shape": 312684, "model.layers.1.self_attn.o_proj.in_perm.shape": 2048, "model.layers.1.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.1.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.1.mlp.gate_proj.col_vals.shape": 766401, "model.layers.1.mlp.gate_proj.in_perm.shape": 2048, "model.layers.1.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.1.mlp.up_proj.row_offsets.shape": 11009, "model.layers.1.mlp.up_proj.col_vals.shape": 770241, "model.layers.1.mlp.up_proj.in_perm.shape": 2048, "model.layers.1.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.1.mlp.down_proj.row_offsets.shape": 4097, "model.layers.1.mlp.down_proj.col_vals.shape": 65901, "model.layers.1.mlp.down_proj.in_perm.shape": 5504, "model.layers.2.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.2.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.2.self_attn.q_proj.col_vals.shape": 277378, "model.layers.2.self_attn.q_proj.in_perm.shape": 2048, "model.layers.2.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.2.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.2.self_attn.k_proj.col_vals.shape": 275528, "model.layers.2.self_attn.k_proj.in_perm.shape": 2048, "model.layers.2.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.2.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.2.self_attn.v_proj.col_vals.shape": 316985, "model.layers.2.self_attn.v_proj.in_perm.shape": 2048, "model.layers.2.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.2.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.2.self_attn.o_proj.col_vals.shape": 275900, "model.layers.2.self_attn.o_proj.in_perm.shape": 2048, "model.layers.2.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.2.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.2.mlp.gate_proj.col_vals.shape": 766027, "model.layers.2.mlp.gate_proj.in_perm.shape": 2048, "model.layers.2.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.2.mlp.up_proj.row_offsets.shape": 11009, "model.layers.2.mlp.up_proj.col_vals.shape": 764280, "model.layers.2.mlp.up_proj.in_perm.shape": 2048, "model.layers.2.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.2.mlp.down_proj.row_offsets.shape": 4097, "model.layers.2.mlp.down_proj.col_vals.shape": 776893, "model.layers.2.mlp.down_proj.in_perm.shape": 5504, "model.layers.3.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.3.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.3.self_attn.q_proj.col_vals.shape": 290549, "model.layers.3.self_attn.q_proj.in_perm.shape": 2048, "model.layers.3.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.3.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.3.self_attn.k_proj.col_vals.shape": 286015, "model.layers.3.self_attn.k_proj.in_perm.shape": 2048, "model.layers.3.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.3.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.3.self_attn.v_proj.col_vals.shape": 315620, "model.layers.3.self_attn.v_proj.in_perm.shape": 2048, "model.layers.3.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.3.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.3.self_attn.o_proj.col_vals.shape": 262559, "model.layers.3.self_attn.o_proj.in_perm.shape": 2048, "model.layers.3.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.3.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.3.mlp.gate_proj.col_vals.shape": 761413, "model.layers.3.mlp.gate_proj.in_perm.shape": 2048, "model.layers.3.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.3.mlp.up_proj.row_offsets.shape": 11009, "model.layers.3.mlp.up_proj.col_vals.shape": 765370, "model.layers.3.mlp.up_proj.in_perm.shape": 2048, "model.layers.3.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.3.mlp.down_proj.row_offsets.shape": 4097, "model.layers.3.mlp.down_proj.col_vals.shape": 790568, "model.layers.3.mlp.down_proj.in_perm.shape": 5504, "model.layers.4.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.4.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.4.self_attn.q_proj.col_vals.shape": 279570, "model.layers.4.self_attn.q_proj.in_perm.shape": 2048, "model.layers.4.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.4.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.4.self_attn.k_proj.col_vals.shape": 270454, "model.layers.4.self_attn.k_proj.in_perm.shape": 2048, "model.layers.4.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.4.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.4.self_attn.v_proj.col_vals.shape": 314846, "model.layers.4.self_attn.v_proj.in_perm.shape": 2048, "model.layers.4.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.4.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.4.self_attn.o_proj.col_vals.shape": 278091, "model.layers.4.self_attn.o_proj.in_perm.shape": 2048, "model.layers.4.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.4.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.4.mlp.gate_proj.col_vals.shape": 761084, "model.layers.4.mlp.gate_proj.in_perm.shape": 2048, "model.layers.4.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.4.mlp.up_proj.row_offsets.shape": 11009, "model.layers.4.mlp.up_proj.col_vals.shape": 771623, "model.layers.4.mlp.up_proj.in_perm.shape": 2048, "model.layers.4.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.4.mlp.down_proj.row_offsets.shape": 4097, "model.layers.4.mlp.down_proj.col_vals.shape": 789374, "model.layers.4.mlp.down_proj.in_perm.shape": 5504, "model.layers.5.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.5.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.5.self_attn.q_proj.col_vals.shape": 280057, "model.layers.5.self_attn.q_proj.in_perm.shape": 2048, "model.layers.5.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.5.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.5.self_attn.k_proj.col_vals.shape": 268852, "model.layers.5.self_attn.k_proj.in_perm.shape": 2048, "model.layers.5.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.5.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.5.self_attn.v_proj.col_vals.shape": 318024, "model.layers.5.self_attn.v_proj.in_perm.shape": 2048, "model.layers.5.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.5.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.5.self_attn.o_proj.col_vals.shape": 273265, "model.layers.5.self_attn.o_proj.in_perm.shape": 2048, "model.layers.5.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.5.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.5.mlp.gate_proj.col_vals.shape": 759031, "model.layers.5.mlp.gate_proj.in_perm.shape": 2048, "model.layers.5.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.5.mlp.up_proj.row_offsets.shape": 11009, "model.layers.5.mlp.up_proj.col_vals.shape": 770071, "model.layers.5.mlp.up_proj.in_perm.shape": 2048, "model.layers.5.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.5.mlp.down_proj.row_offsets.shape": 4097, "model.layers.5.mlp.down_proj.col_vals.shape": 786653, "model.layers.5.mlp.down_proj.in_perm.shape": 5504, "model.layers.6.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.6.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.6.self_attn.q_proj.col_vals.shape": 284527, "model.layers.6.self_attn.q_proj.in_perm.shape": 2048, "model.layers.6.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.6.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.6.self_attn.k_proj.col_vals.shape": 280859, "model.layers.6.self_attn.k_proj.in_perm.shape": 2048, "model.layers.6.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.6.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.6.self_attn.v_proj.col_vals.shape": 314947, "model.layers.6.self_attn.v_proj.in_perm.shape": 2048, "model.layers.6.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.6.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.6.self_attn.o_proj.col_vals.shape": 275025, "model.layers.6.self_attn.o_proj.in_perm.shape": 2048, "model.layers.6.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.6.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.6.mlp.gate_proj.col_vals.shape": 759168, "model.layers.6.mlp.gate_proj.in_perm.shape": 2048, "model.layers.6.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.6.mlp.up_proj.row_offsets.shape": 11009, "model.layers.6.mlp.up_proj.col_vals.shape": 770914, "model.layers.6.mlp.up_proj.in_perm.shape": 2048, "model.layers.6.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.6.mlp.down_proj.row_offsets.shape": 4097, "model.layers.6.mlp.down_proj.col_vals.shape": 785700, "model.layers.6.mlp.down_proj.in_perm.shape": 5504, "model.layers.7.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.7.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.7.self_attn.q_proj.col_vals.shape": 287757, "model.layers.7.self_attn.q_proj.in_perm.shape": 2048, "model.layers.7.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.7.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.7.self_attn.k_proj.col_vals.shape": 281927, "model.layers.7.self_attn.k_proj.in_perm.shape": 2048, "model.layers.7.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.7.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.7.self_attn.v_proj.col_vals.shape": 314672, "model.layers.7.self_attn.v_proj.in_perm.shape": 2048, "model.layers.7.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.7.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.7.self_attn.o_proj.col_vals.shape": 274752, "model.layers.7.self_attn.o_proj.in_perm.shape": 2048, "model.layers.7.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.7.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.7.mlp.gate_proj.col_vals.shape": 760209, "model.layers.7.mlp.gate_proj.in_perm.shape": 2048, "model.layers.7.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.7.mlp.up_proj.row_offsets.shape": 11009, "model.layers.7.mlp.up_proj.col_vals.shape": 772503, "model.layers.7.mlp.up_proj.in_perm.shape": 2048, "model.layers.7.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.7.mlp.down_proj.row_offsets.shape": 4097, "model.layers.7.mlp.down_proj.col_vals.shape": 783181, "model.layers.7.mlp.down_proj.in_perm.shape": 5504, "model.layers.8.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.8.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.8.self_attn.q_proj.col_vals.shape": 286402, "model.layers.8.self_attn.q_proj.in_perm.shape": 2048, "model.layers.8.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.8.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.8.self_attn.k_proj.col_vals.shape": 274369, "model.layers.8.self_attn.k_proj.in_perm.shape": 2048, "model.layers.8.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.8.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.8.self_attn.v_proj.col_vals.shape": 316146, "model.layers.8.self_attn.v_proj.in_perm.shape": 2048, "model.layers.8.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.8.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.8.self_attn.o_proj.col_vals.shape": 276760, "model.layers.8.self_attn.o_proj.in_perm.shape": 2048, "model.layers.8.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.8.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.8.mlp.gate_proj.col_vals.shape": 757944, "model.layers.8.mlp.gate_proj.in_perm.shape": 2048, "model.layers.8.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.8.mlp.up_proj.row_offsets.shape": 11009, "model.layers.8.mlp.up_proj.col_vals.shape": 774371, "model.layers.8.mlp.up_proj.in_perm.shape": 2048, "model.layers.8.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.8.mlp.down_proj.row_offsets.shape": 4097, "model.layers.8.mlp.down_proj.col_vals.shape": 783639, "model.layers.8.mlp.down_proj.in_perm.shape": 5504, "model.layers.9.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.9.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.9.self_attn.q_proj.col_vals.shape": 287016, "model.layers.9.self_attn.q_proj.in_perm.shape": 2048, "model.layers.9.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.9.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.9.self_attn.k_proj.col_vals.shape": 272500, "model.layers.9.self_attn.k_proj.in_perm.shape": 2048, "model.layers.9.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.9.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.9.self_attn.v_proj.col_vals.shape": 311792, "model.layers.9.self_attn.v_proj.in_perm.shape": 2048, "model.layers.9.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.9.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.9.self_attn.o_proj.col_vals.shape": 277506, "model.layers.9.self_attn.o_proj.in_perm.shape": 2048, "model.layers.9.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.9.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.9.mlp.gate_proj.col_vals.shape": 758413, "model.layers.9.mlp.gate_proj.in_perm.shape": 2048, "model.layers.9.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.9.mlp.up_proj.row_offsets.shape": 11009, "model.layers.9.mlp.up_proj.col_vals.shape": 778305, "model.layers.9.mlp.up_proj.in_perm.shape": 2048, "model.layers.9.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.9.mlp.down_proj.row_offsets.shape": 4097, "model.layers.9.mlp.down_proj.col_vals.shape": 788558, "model.layers.9.mlp.down_proj.in_perm.shape": 5504, "model.layers.10.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.10.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.10.self_attn.q_proj.col_vals.shape": 287968, "model.layers.10.self_attn.q_proj.in_perm.shape": 2048, "model.layers.10.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.10.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.10.self_attn.k_proj.col_vals.shape": 272194, "model.layers.10.self_attn.k_proj.in_perm.shape": 2048, "model.layers.10.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.10.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.10.self_attn.v_proj.col_vals.shape": 308825, "model.layers.10.self_attn.v_proj.in_perm.shape": 2048, "model.layers.10.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.10.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.10.self_attn.o_proj.col_vals.shape": 276573, "model.layers.10.self_attn.o_proj.in_perm.shape": 2048, "model.layers.10.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.10.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.10.mlp.gate_proj.col_vals.shape": 754727, "model.layers.10.mlp.gate_proj.in_perm.shape": 2048, "model.layers.10.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.10.mlp.up_proj.row_offsets.shape": 11009, "model.layers.10.mlp.up_proj.col_vals.shape": 776777, "model.layers.10.mlp.up_proj.in_perm.shape": 2048, "model.layers.10.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.10.mlp.down_proj.row_offsets.shape": 4097, "model.layers.10.mlp.down_proj.col_vals.shape": 795947, "model.layers.10.mlp.down_proj.in_perm.shape": 5504, "model.layers.11.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.11.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.11.self_attn.q_proj.col_vals.shape": 283876, "model.layers.11.self_attn.q_proj.in_perm.shape": 2048, "model.layers.11.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.11.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.11.self_attn.k_proj.col_vals.shape": 267497, "model.layers.11.self_attn.k_proj.in_perm.shape": 2048, "model.layers.11.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.11.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.11.self_attn.v_proj.col_vals.shape": 311473, "model.layers.11.self_attn.v_proj.in_perm.shape": 2048, "model.layers.11.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.11.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.11.self_attn.o_proj.col_vals.shape": 278478, "model.layers.11.self_attn.o_proj.in_perm.shape": 2048, "model.layers.11.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.11.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.11.mlp.gate_proj.col_vals.shape": 755255, "model.layers.11.mlp.gate_proj.in_perm.shape": 2048, "model.layers.11.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.11.mlp.up_proj.row_offsets.shape": 11009, "model.layers.11.mlp.up_proj.col_vals.shape": 774866, "model.layers.11.mlp.up_proj.in_perm.shape": 2048, "model.layers.11.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.11.mlp.down_proj.row_offsets.shape": 4097, "model.layers.11.mlp.down_proj.col_vals.shape": 785771, "model.layers.11.mlp.down_proj.in_perm.shape": 5504, "model.layers.12.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.12.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.12.self_attn.q_proj.col_vals.shape": 286613, "model.layers.12.self_attn.q_proj.in_perm.shape": 2048, "model.layers.12.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.12.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.12.self_attn.k_proj.col_vals.shape": 271141, "model.layers.12.self_attn.k_proj.in_perm.shape": 2048, "model.layers.12.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.12.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.12.self_attn.v_proj.col_vals.shape": 307220, "model.layers.12.self_attn.v_proj.in_perm.shape": 2048, "model.layers.12.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.12.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.12.self_attn.o_proj.col_vals.shape": 278019, "model.layers.12.self_attn.o_proj.in_perm.shape": 2048, "model.layers.12.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.12.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.12.mlp.gate_proj.col_vals.shape": 753547, "model.layers.12.mlp.gate_proj.in_perm.shape": 2048, "model.layers.12.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.12.mlp.up_proj.row_offsets.shape": 11009, "model.layers.12.mlp.up_proj.col_vals.shape": 774577, "model.layers.12.mlp.up_proj.in_perm.shape": 2048, "model.layers.12.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.12.mlp.down_proj.row_offsets.shape": 4097, "model.layers.12.mlp.down_proj.col_vals.shape": 785966, "model.layers.12.mlp.down_proj.in_perm.shape": 5504, "model.layers.13.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.13.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.13.self_attn.q_proj.col_vals.shape": 284102, "model.layers.13.self_attn.q_proj.in_perm.shape": 2048, "model.layers.13.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.13.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.13.self_attn.k_proj.col_vals.shape": 271166, "model.layers.13.self_attn.k_proj.in_perm.shape": 2048, "model.layers.13.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.13.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.13.self_attn.v_proj.col_vals.shape": 307926, "model.layers.13.self_attn.v_proj.in_perm.shape": 2048, "model.layers.13.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.13.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.13.self_attn.o_proj.col_vals.shape": 278409, "model.layers.13.self_attn.o_proj.in_perm.shape": 2048, "model.layers.13.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.13.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.13.mlp.gate_proj.col_vals.shape": 754940, "model.layers.13.mlp.gate_proj.in_perm.shape": 2048, "model.layers.13.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.13.mlp.up_proj.row_offsets.shape": 11009, "model.layers.13.mlp.up_proj.col_vals.shape": 773648, "model.layers.13.mlp.up_proj.in_perm.shape": 2048, "model.layers.13.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.13.mlp.down_proj.row_offsets.shape": 4097, "model.layers.13.mlp.down_proj.col_vals.shape": 795711, "model.layers.13.mlp.down_proj.in_perm.shape": 5504, "model.layers.14.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.14.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.14.self_attn.q_proj.col_vals.shape": 286971, "model.layers.14.self_attn.q_proj.in_perm.shape": 2048, "model.layers.14.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.14.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.14.self_attn.k_proj.col_vals.shape": 267434, "model.layers.14.self_attn.k_proj.in_perm.shape": 2048, "model.layers.14.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.14.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.14.self_attn.v_proj.col_vals.shape": 310037, "model.layers.14.self_attn.v_proj.in_perm.shape": 2048, "model.layers.14.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.14.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.14.self_attn.o_proj.col_vals.shape": 278587, "model.layers.14.self_attn.o_proj.in_perm.shape": 2048, "model.layers.14.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.14.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.14.mlp.gate_proj.col_vals.shape": 755528, "model.layers.14.mlp.gate_proj.in_perm.shape": 2048, "model.layers.14.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.14.mlp.up_proj.row_offsets.shape": 11009, "model.layers.14.mlp.up_proj.col_vals.shape": 772483, "model.layers.14.mlp.up_proj.in_perm.shape": 2048, "model.layers.14.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.14.mlp.down_proj.row_offsets.shape": 4097, "model.layers.14.mlp.down_proj.col_vals.shape": 794709, "model.layers.14.mlp.down_proj.in_perm.shape": 5504, "model.layers.15.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.15.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.15.self_attn.q_proj.col_vals.shape": 286611, "model.layers.15.self_attn.q_proj.in_perm.shape": 2048, "model.layers.15.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.15.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.15.self_attn.k_proj.col_vals.shape": 270554, "model.layers.15.self_attn.k_proj.in_perm.shape": 2048, "model.layers.15.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.15.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.15.self_attn.v_proj.col_vals.shape": 312763, "model.layers.15.self_attn.v_proj.in_perm.shape": 2048, "model.layers.15.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.15.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.15.self_attn.o_proj.col_vals.shape": 278818, "model.layers.15.self_attn.o_proj.in_perm.shape": 2048, "model.layers.15.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.15.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.15.mlp.gate_proj.col_vals.shape": 755604, "model.layers.15.mlp.gate_proj.in_perm.shape": 2048, "model.layers.15.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.15.mlp.up_proj.row_offsets.shape": 11009, "model.layers.15.mlp.up_proj.col_vals.shape": 770147, "model.layers.15.mlp.up_proj.in_perm.shape": 2048, "model.layers.15.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.15.mlp.down_proj.row_offsets.shape": 4097, "model.layers.15.mlp.down_proj.col_vals.shape": 800737, "model.layers.15.mlp.down_proj.in_perm.shape": 5504, "model.layers.16.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.16.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.16.self_attn.q_proj.col_vals.shape": 285161, "model.layers.16.self_attn.q_proj.in_perm.shape": 2048, "model.layers.16.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.16.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.16.self_attn.k_proj.col_vals.shape": 264469, "model.layers.16.self_attn.k_proj.in_perm.shape": 2048, "model.layers.16.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.16.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.16.self_attn.v_proj.col_vals.shape": 310837, "model.layers.16.self_attn.v_proj.in_perm.shape": 2048, "model.layers.16.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.16.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.16.self_attn.o_proj.col_vals.shape": 280326, "model.layers.16.self_attn.o_proj.in_perm.shape": 2048, "model.layers.16.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.16.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.16.mlp.gate_proj.col_vals.shape": 751402, "model.layers.16.mlp.gate_proj.in_perm.shape": 2048, "model.layers.16.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.16.mlp.up_proj.row_offsets.shape": 11009, "model.layers.16.mlp.up_proj.col_vals.shape": 765291, "model.layers.16.mlp.up_proj.in_perm.shape": 2048, "model.layers.16.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.16.mlp.down_proj.row_offsets.shape": 4097, "model.layers.16.mlp.down_proj.col_vals.shape": 796628, "model.layers.16.mlp.down_proj.in_perm.shape": 5504, "model.layers.17.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.17.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.17.self_attn.q_proj.col_vals.shape": 285905, "model.layers.17.self_attn.q_proj.in_perm.shape": 2048, "model.layers.17.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.17.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.17.self_attn.k_proj.col_vals.shape": 270381, "model.layers.17.self_attn.k_proj.in_perm.shape": 2048, "model.layers.17.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.17.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.17.self_attn.v_proj.col_vals.shape": 307352, "model.layers.17.self_attn.v_proj.in_perm.shape": 2048, "model.layers.17.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.17.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.17.self_attn.o_proj.col_vals.shape": 281243, "model.layers.17.self_attn.o_proj.in_perm.shape": 2048, "model.layers.17.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.17.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.17.mlp.gate_proj.col_vals.shape": 750543, "model.layers.17.mlp.gate_proj.in_perm.shape": 2048, "model.layers.17.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.17.mlp.up_proj.row_offsets.shape": 11009, "model.layers.17.mlp.up_proj.col_vals.shape": 761966, "model.layers.17.mlp.up_proj.in_perm.shape": 2048, "model.layers.17.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.17.mlp.down_proj.row_offsets.shape": 4097, "model.layers.17.mlp.down_proj.col_vals.shape": 786876, "model.layers.17.mlp.down_proj.in_perm.shape": 5504, "model.layers.18.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.18.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.18.self_attn.q_proj.col_vals.shape": 287515, "model.layers.18.self_attn.q_proj.in_perm.shape": 2048, "model.layers.18.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.18.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.18.self_attn.k_proj.col_vals.shape": 275355, "model.layers.18.self_attn.k_proj.in_perm.shape": 2048, "model.layers.18.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.18.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.18.self_attn.v_proj.col_vals.shape": 305207, "model.layers.18.self_attn.v_proj.in_perm.shape": 2048, "model.layers.18.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.18.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.18.self_attn.o_proj.col_vals.shape": 280193, "model.layers.18.self_attn.o_proj.in_perm.shape": 2048, "model.layers.18.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.18.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.18.mlp.gate_proj.col_vals.shape": 750879, "model.layers.18.mlp.gate_proj.in_perm.shape": 2048, "model.layers.18.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.18.mlp.up_proj.row_offsets.shape": 11009, "model.layers.18.mlp.up_proj.col_vals.shape": 761380, "model.layers.18.mlp.up_proj.in_perm.shape": 2048, "model.layers.18.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.18.mlp.down_proj.row_offsets.shape": 4097, "model.layers.18.mlp.down_proj.col_vals.shape": 791345, "model.layers.18.mlp.down_proj.in_perm.shape": 5504, "model.layers.19.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.19.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.19.self_attn.q_proj.col_vals.shape": 284178, "model.layers.19.self_attn.q_proj.in_perm.shape": 2048, "model.layers.19.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.19.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.19.self_attn.k_proj.col_vals.shape": 269926, "model.layers.19.self_attn.k_proj.in_perm.shape": 2048, "model.layers.19.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.19.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.19.self_attn.v_proj.col_vals.shape": 303253, "model.layers.19.self_attn.v_proj.in_perm.shape": 2048, "model.layers.19.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.19.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.19.self_attn.o_proj.col_vals.shape": 281276, "model.layers.19.self_attn.o_proj.in_perm.shape": 2048, "model.layers.19.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.19.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.19.mlp.gate_proj.col_vals.shape": 751227, "model.layers.19.mlp.gate_proj.in_perm.shape": 2048, "model.layers.19.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.19.mlp.up_proj.row_offsets.shape": 11009, "model.layers.19.mlp.up_proj.col_vals.shape": 759368, "model.layers.19.mlp.up_proj.in_perm.shape": 2048, "model.layers.19.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.19.mlp.down_proj.row_offsets.shape": 4097, "model.layers.19.mlp.down_proj.col_vals.shape": 785349, "model.layers.19.mlp.down_proj.in_perm.shape": 5504, "model.layers.20.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.20.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.20.self_attn.q_proj.col_vals.shape": 283706, "model.layers.20.self_attn.q_proj.in_perm.shape": 2048, "model.layers.20.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.20.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.20.self_attn.k_proj.col_vals.shape": 272655, "model.layers.20.self_attn.k_proj.in_perm.shape": 2048, "model.layers.20.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.20.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.20.self_attn.v_proj.col_vals.shape": 303626, "model.layers.20.self_attn.v_proj.in_perm.shape": 2048, "model.layers.20.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.20.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.20.self_attn.o_proj.col_vals.shape": 278935, "model.layers.20.self_attn.o_proj.in_perm.shape": 2048, "model.layers.20.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.20.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.20.mlp.gate_proj.col_vals.shape": 751411, "model.layers.20.mlp.gate_proj.in_perm.shape": 2048, "model.layers.20.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.20.mlp.up_proj.row_offsets.shape": 11009, "model.layers.20.mlp.up_proj.col_vals.shape": 758252, "model.layers.20.mlp.up_proj.in_perm.shape": 2048, "model.layers.20.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.20.mlp.down_proj.row_offsets.shape": 4097, "model.layers.20.mlp.down_proj.col_vals.shape": 784248, "model.layers.20.mlp.down_proj.in_perm.shape": 5504, "model.layers.21.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.21.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.21.self_attn.q_proj.col_vals.shape": 284547, "model.layers.21.self_attn.q_proj.in_perm.shape": 2048, "model.layers.21.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.21.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.21.self_attn.k_proj.col_vals.shape": 276845, "model.layers.21.self_attn.k_proj.in_perm.shape": 2048, "model.layers.21.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.21.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.21.self_attn.v_proj.col_vals.shape": 300429, "model.layers.21.self_attn.v_proj.in_perm.shape": 2048, "model.layers.21.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.21.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.21.self_attn.o_proj.col_vals.shape": 278938, "model.layers.21.self_attn.o_proj.in_perm.shape": 2048, "model.layers.21.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.21.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.21.mlp.gate_proj.col_vals.shape": 753249, "model.layers.21.mlp.gate_proj.in_perm.shape": 2048, "model.layers.21.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.21.mlp.up_proj.row_offsets.shape": 11009, "model.layers.21.mlp.up_proj.col_vals.shape": 760378, "model.layers.21.mlp.up_proj.in_perm.shape": 2048, "model.layers.21.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.21.mlp.down_proj.row_offsets.shape": 4097, "model.layers.21.mlp.down_proj.col_vals.shape": 778977, "model.layers.21.mlp.down_proj.in_perm.shape": 5504, "model.layers.22.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.22.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.22.self_attn.q_proj.col_vals.shape": 287172, "model.layers.22.self_attn.q_proj.in_perm.shape": 2048, "model.layers.22.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.22.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.22.self_attn.k_proj.col_vals.shape": 279951, "model.layers.22.self_attn.k_proj.in_perm.shape": 2048, "model.layers.22.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.22.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.22.self_attn.v_proj.col_vals.shape": 300804, "model.layers.22.self_attn.v_proj.in_perm.shape": 2048, "model.layers.22.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.22.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.22.self_attn.o_proj.col_vals.shape": 277006, "model.layers.22.self_attn.o_proj.in_perm.shape": 2048, "model.layers.22.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.22.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.22.mlp.gate_proj.col_vals.shape": 755441, "model.layers.22.mlp.gate_proj.in_perm.shape": 2048, "model.layers.22.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.22.mlp.up_proj.row_offsets.shape": 11009, "model.layers.22.mlp.up_proj.col_vals.shape": 760582, "model.layers.22.mlp.up_proj.in_perm.shape": 2048, "model.layers.22.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.22.mlp.down_proj.row_offsets.shape": 4097, "model.layers.22.mlp.down_proj.col_vals.shape": 777317, "model.layers.22.mlp.down_proj.in_perm.shape": 5504, "model.layers.23.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.23.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.23.self_attn.q_proj.col_vals.shape": 286813, "model.layers.23.self_attn.q_proj.in_perm.shape": 2048, "model.layers.23.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.23.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.23.self_attn.k_proj.col_vals.shape": 281775, "model.layers.23.self_attn.k_proj.in_perm.shape": 2048, "model.layers.23.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.23.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.23.self_attn.v_proj.col_vals.shape": 295725, "model.layers.23.self_attn.v_proj.in_perm.shape": 2048, "model.layers.23.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.23.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.23.self_attn.o_proj.col_vals.shape": 279797, "model.layers.23.self_attn.o_proj.in_perm.shape": 2048, "model.layers.23.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.23.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.23.mlp.gate_proj.col_vals.shape": 753193, "model.layers.23.mlp.gate_proj.in_perm.shape": 2048, "model.layers.23.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.23.mlp.up_proj.row_offsets.shape": 11009, "model.layers.23.mlp.up_proj.col_vals.shape": 758030, "model.layers.23.mlp.up_proj.in_perm.shape": 2048, "model.layers.23.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.23.mlp.down_proj.row_offsets.shape": 4097, "model.layers.23.mlp.down_proj.col_vals.shape": 779986, "model.layers.23.mlp.down_proj.in_perm.shape": 5504, "model.layers.24.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.24.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.24.self_attn.q_proj.col_vals.shape": 285039, "model.layers.24.self_attn.q_proj.in_perm.shape": 2048, "model.layers.24.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.24.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.24.self_attn.k_proj.col_vals.shape": 277202, "model.layers.24.self_attn.k_proj.in_perm.shape": 2048, "model.layers.24.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.24.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.24.self_attn.v_proj.col_vals.shape": 298680, "model.layers.24.self_attn.v_proj.in_perm.shape": 2048, "model.layers.24.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.24.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.24.self_attn.o_proj.col_vals.shape": 279938, "model.layers.24.self_attn.o_proj.in_perm.shape": 2048, "model.layers.24.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.24.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.24.mlp.gate_proj.col_vals.shape": 752983, "model.layers.24.mlp.gate_proj.in_perm.shape": 2048, "model.layers.24.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.24.mlp.up_proj.row_offsets.shape": 11009, "model.layers.24.mlp.up_proj.col_vals.shape": 758555, "model.layers.24.mlp.up_proj.in_perm.shape": 2048, "model.layers.24.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.24.mlp.down_proj.row_offsets.shape": 4097, "model.layers.24.mlp.down_proj.col_vals.shape": 779069, "model.layers.24.mlp.down_proj.in_perm.shape": 5504, "model.layers.25.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.25.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.25.self_attn.q_proj.col_vals.shape": 284365, "model.layers.25.self_attn.q_proj.in_perm.shape": 2048, "model.layers.25.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.25.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.25.self_attn.k_proj.col_vals.shape": 280541, "model.layers.25.self_attn.k_proj.in_perm.shape": 2048, "model.layers.25.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.25.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.25.self_attn.v_proj.col_vals.shape": 293143, "model.layers.25.self_attn.v_proj.in_perm.shape": 2048, "model.layers.25.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.25.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.25.self_attn.o_proj.col_vals.shape": 278790, "model.layers.25.self_attn.o_proj.in_perm.shape": 2048, "model.layers.25.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.25.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.25.mlp.gate_proj.col_vals.shape": 751453, "model.layers.25.mlp.gate_proj.in_perm.shape": 2048, "model.layers.25.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.25.mlp.up_proj.row_offsets.shape": 11009, "model.layers.25.mlp.up_proj.col_vals.shape": 757286, "model.layers.25.mlp.up_proj.in_perm.shape": 2048, "model.layers.25.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.25.mlp.down_proj.row_offsets.shape": 4097, "model.layers.25.mlp.down_proj.col_vals.shape": 782955, "model.layers.25.mlp.down_proj.in_perm.shape": 5504, "model.layers.26.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.26.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.26.self_attn.q_proj.col_vals.shape": 284330, "model.layers.26.self_attn.q_proj.in_perm.shape": 2048, "model.layers.26.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.26.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.26.self_attn.k_proj.col_vals.shape": 277278, "model.layers.26.self_attn.k_proj.in_perm.shape": 2048, "model.layers.26.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.26.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.26.self_attn.v_proj.col_vals.shape": 294635, "model.layers.26.self_attn.v_proj.in_perm.shape": 2048, "model.layers.26.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.26.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.26.self_attn.o_proj.col_vals.shape": 275895, "model.layers.26.self_attn.o_proj.in_perm.shape": 2048, "model.layers.26.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.26.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.26.mlp.gate_proj.col_vals.shape": 747723, "model.layers.26.mlp.gate_proj.in_perm.shape": 2048, "model.layers.26.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.26.mlp.up_proj.row_offsets.shape": 11009, "model.layers.26.mlp.up_proj.col_vals.shape": 759279, "model.layers.26.mlp.up_proj.in_perm.shape": 2048, "model.layers.26.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.26.mlp.down_proj.row_offsets.shape": 4097, "model.layers.26.mlp.down_proj.col_vals.shape": 796751, "model.layers.26.mlp.down_proj.in_perm.shape": 5504, "model.layers.27.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.27.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.27.self_attn.q_proj.col_vals.shape": 282594, "model.layers.27.self_attn.q_proj.in_perm.shape": 2048, "model.layers.27.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.27.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.27.self_attn.k_proj.col_vals.shape": 279388, "model.layers.27.self_attn.k_proj.in_perm.shape": 2048, "model.layers.27.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.27.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.27.self_attn.v_proj.col_vals.shape": 289702, "model.layers.27.self_attn.v_proj.in_perm.shape": 2048, "model.layers.27.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.27.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.27.self_attn.o_proj.col_vals.shape": 279438, "model.layers.27.self_attn.o_proj.in_perm.shape": 2048, "model.layers.27.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.27.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.27.mlp.gate_proj.col_vals.shape": 744601, "model.layers.27.mlp.gate_proj.in_perm.shape": 2048, "model.layers.27.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.27.mlp.up_proj.row_offsets.shape": 11009, "model.layers.27.mlp.up_proj.col_vals.shape": 760423, "model.layers.27.mlp.up_proj.in_perm.shape": 2048, "model.layers.27.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.27.mlp.down_proj.row_offsets.shape": 4097, "model.layers.27.mlp.down_proj.col_vals.shape": 809936, "model.layers.27.mlp.down_proj.in_perm.shape": 5504, "model.layers.28.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.28.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.28.self_attn.q_proj.col_vals.shape": 283449, "model.layers.28.self_attn.q_proj.in_perm.shape": 2048, "model.layers.28.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.28.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.28.self_attn.k_proj.col_vals.shape": 280044, "model.layers.28.self_attn.k_proj.in_perm.shape": 2048, "model.layers.28.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.28.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.28.self_attn.v_proj.col_vals.shape": 289314, "model.layers.28.self_attn.v_proj.in_perm.shape": 2048, "model.layers.28.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.28.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.28.self_attn.o_proj.col_vals.shape": 272741, "model.layers.28.self_attn.o_proj.in_perm.shape": 2048, "model.layers.28.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.28.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.28.mlp.gate_proj.col_vals.shape": 741289, "model.layers.28.mlp.gate_proj.in_perm.shape": 2048, "model.layers.28.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.28.mlp.up_proj.row_offsets.shape": 11009, "model.layers.28.mlp.up_proj.col_vals.shape": 762307, "model.layers.28.mlp.up_proj.in_perm.shape": 2048, "model.layers.28.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.28.mlp.down_proj.row_offsets.shape": 4097, "model.layers.28.mlp.down_proj.col_vals.shape": 825477, "model.layers.28.mlp.down_proj.in_perm.shape": 5504, "model.layers.29.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.29.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.29.self_attn.q_proj.col_vals.shape": 281395, "model.layers.29.self_attn.q_proj.in_perm.shape": 2048, "model.layers.29.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.29.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.29.self_attn.k_proj.col_vals.shape": 276980, "model.layers.29.self_attn.k_proj.in_perm.shape": 2048, "model.layers.29.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.29.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.29.self_attn.v_proj.col_vals.shape": 292345, "model.layers.29.self_attn.v_proj.in_perm.shape": 2048, "model.layers.29.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.29.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.29.self_attn.o_proj.col_vals.shape": 276014, "model.layers.29.self_attn.o_proj.in_perm.shape": 2048, "model.layers.29.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.29.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.29.mlp.gate_proj.col_vals.shape": 737393, "model.layers.29.mlp.gate_proj.in_perm.shape": 2048, "model.layers.29.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.29.mlp.up_proj.row_offsets.shape": 11009, "model.layers.29.mlp.up_proj.col_vals.shape": 758464, "model.layers.29.mlp.up_proj.in_perm.shape": 2048, "model.layers.29.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.29.mlp.down_proj.row_offsets.shape": 4097, "model.layers.29.mlp.down_proj.col_vals.shape": 850037, "model.layers.29.mlp.down_proj.in_perm.shape": 5504, "model.layers.30.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.30.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.30.self_attn.q_proj.col_vals.shape": 281230, "model.layers.30.self_attn.q_proj.in_perm.shape": 2048, "model.layers.30.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.30.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.30.self_attn.k_proj.col_vals.shape": 277682, "model.layers.30.self_attn.k_proj.in_perm.shape": 2048, "model.layers.30.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.30.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.30.self_attn.v_proj.col_vals.shape": 287809, "model.layers.30.self_attn.v_proj.in_perm.shape": 2048, "model.layers.30.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.30.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.30.self_attn.o_proj.col_vals.shape": 277445, "model.layers.30.self_attn.o_proj.in_perm.shape": 2048, "model.layers.30.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.30.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.30.mlp.gate_proj.col_vals.shape": 721613, "model.layers.30.mlp.gate_proj.in_perm.shape": 2048, "model.layers.30.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.30.mlp.up_proj.row_offsets.shape": 11009, "model.layers.30.mlp.up_proj.col_vals.shape": 746115, "model.layers.30.mlp.up_proj.in_perm.shape": 2048, "model.layers.30.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.30.mlp.down_proj.row_offsets.shape": 4097, "model.layers.30.mlp.down_proj.col_vals.shape": 829475, "model.layers.30.mlp.down_proj.in_perm.shape": 5504, "model.layers.31.self_attn.q_proj.dense_weights.shape": 1048576, "model.layers.31.self_attn.q_proj.row_offsets.shape": 4097, "model.layers.31.self_attn.q_proj.col_vals.shape": 277274, "model.layers.31.self_attn.q_proj.in_perm.shape": 2048, "model.layers.31.self_attn.k_proj.dense_weights.shape": 1048576, "model.layers.31.self_attn.k_proj.row_offsets.shape": 4097, "model.layers.31.self_attn.k_proj.col_vals.shape": 274320, "model.layers.31.self_attn.k_proj.in_perm.shape": 2048, "model.layers.31.self_attn.v_proj.dense_weights.shape": 1048576, "model.layers.31.self_attn.v_proj.row_offsets.shape": 4097, "model.layers.31.self_attn.v_proj.col_vals.shape": 290918, "model.layers.31.self_attn.v_proj.in_perm.shape": 2048, "model.layers.31.self_attn.o_proj.dense_weights.shape": 1048576, "model.layers.31.self_attn.o_proj.row_offsets.shape": 4097, "model.layers.31.self_attn.o_proj.col_vals.shape": 266999, "model.layers.31.self_attn.o_proj.in_perm.shape": 2048, "model.layers.31.mlp.gate_proj.dense_weights.shape": 2818048, "model.layers.31.mlp.gate_proj.row_offsets.shape": 11009, "model.layers.31.mlp.gate_proj.col_vals.shape": 741720, "model.layers.31.mlp.gate_proj.in_perm.shape": 2048, "model.layers.31.mlp.up_proj.dense_weights.shape": 2818048, "model.layers.31.mlp.up_proj.row_offsets.shape": 11009, "model.layers.31.mlp.up_proj.col_vals.shape": 761614, "model.layers.31.mlp.up_proj.in_perm.shape": 2048, "model.layers.31.mlp.down_proj.dense_weights.shape": 2818048, "model.layers.31.mlp.down_proj.row_offsets.shape": 4097, "model.layers.31.mlp.down_proj.col_vals.shape": 997160, "model.layers.31.mlp.down_proj.in_perm.shape": 5504 } }, "_attn_implementation_autoset": false }