| layer,module,loss,samples,damp,time | |
| 0,mlp.gate_proj,0.0000019575,0.05000,2.282 | |
| 0,mlp.up_proj,0.0000027544,0.05000,2.295 | |
| 0,mlp.down_proj,0.0000010484,0.05000,3.485 | |
| 0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.133 | |
| 0,self_attn.k_proj,0.0000001276,0.05000,1.896 | |
| 0,self_attn.q_proj,0.0000026756,0.05000,1.900 | |
| 0,self_attn.v_proj,0.0000002852,0.05000,1.926 | |
| 0,self_attn.o_gate,0.0000056989,0.05000,0.416 | |
| 1,mlp.up_proj,0.0000019776,0.05000,2.523 | |
| 1,mlp.gate_proj,0.0000018764,0.05000,2.584 | |
| 1,mlp.down_proj,0.0000005803,0.05000,3.835 | |
| 1,self_attn.v_proj,0.0000274517,0.05000,3.357 | |
| 1,self_attn.o_proj,0.0000006033,0.05000,3.382 | |
| 1,self_attn.q_proj,0.0000212897,0.05000,3.414 | |
| 1,self_attn.k_proj,0.0000216100,0.05000,3.420 | |
| 2,mlp.gate_proj,0.0000023568,0.05000,2.246 | |
| 2,mlp.up_proj,0.0000023672,0.05000,2.256 | |
| 2,mlp.down_proj,0.0000010073,0.05000,3.446 | |
| 2,self_attn.q_proj,0.0000107203,0.05000,2.823 | |
| 2,self_attn.v_proj,0.0000126155,0.05000,2.848 | |
| 2,self_attn.o_proj,0.0000002600,0.05000,2.851 | |
| 2,self_attn.k_proj,0.0000106647,0.05000,2.854 | |
| 3,mlp.up_proj,0.0000042655,0.05000,2.150 | |
| 3,mlp.gate_proj,0.0000048281,0.05000,2.155 | |
| 3,mlp.down_proj,0.0000014951,0.05000,3.343 | |
| 3,self_attn.v_proj,0.0000143808,0.05000,2.827 | |
| 3,self_attn.q_proj,0.0000133468,0.05000,2.856 | |
| 3,self_attn.o_proj,0.0000003673,0.05000,2.865 | |
| 3,self_attn.k_proj,0.0000136954,0.05000,2.868 | |
| 4,mlp.up_proj,0.0000057488,0.05000,2.549 | |
| 4,mlp.gate_proj,0.0000063297,0.05000,2.552 | |
| 4,mlp.down_proj,0.0000016230,0.05000,3.892 | |
| 4,self_attn.q_proj,0.0000106056,0.05000,3.481 | |
| 4,self_attn.v_proj,0.0000128426,0.05000,3.494 | |
| 4,self_attn.k_proj,0.0000108418,0.05000,3.509 | |
| 4,self_attn.o_proj,0.0000002972,0.05000,3.519 | |
| 5,mlp.up_proj,0.0000071429,0.05000,2.613 | |
| 5,mlp.gate_proj,0.0000078490,0.05000,2.635 | |
| 5,mlp.down_proj,0.0000017132,0.05000,3.949 | |
| 5,self_attn.v_proj,0.0000129791,0.05000,3.126 | |
| 5,self_attn.k_proj,0.0000121584,0.05000,3.347 | |
| 5,self_attn.q_proj,0.0000115177,0.05000,3.356 | |
| 5,self_attn.o_proj,0.0000004817,0.05000,3.377 | |
| 6,mlp.up_proj,0.0000088197,0.05000,2.675 | |
| 6,mlp.gate_proj,0.0000095654,0.05000,2.748 | |
| 6,mlp.down_proj,0.0000019187,0.05000,3.957 | |
| 6,self_attn.o_proj,0.0000004831,0.05000,2.738 | |
| 6,self_attn.q_proj,0.0000111541,0.05000,2.795 | |
| 6,self_attn.k_proj,0.0000133217,0.05000,2.824 | |
| 6,self_attn.v_proj,0.0000116766,0.05000,2.834 | |
| 7,mlp.up_proj,0.0000092302,0.05000,1.289 | |
| 7,mlp.gate_proj,0.0000097952,0.05000,1.293 | |
| 7,mlp.down_proj,0.0000022242,0.05000,2.475 | |
| 7,self_attn.v_proj,0.0000109299,0.05000,2.075 | |
| 7,self_attn.k_proj,0.0000121253,0.05000,2.156 | |
| 7,self_attn.q_proj,0.0000099677,0.05000,2.286 | |
| 7,self_attn.o_proj,0.0000005160,0.05000,2.289 | |
| 8,mlp.gate_proj,0.0000104388,0.05000,1.974 | |
| 8,mlp.up_proj,0.0000095090,0.05000,2.055 | |
| 8,mlp.down_proj,0.0000021816,0.05000,3.211 | |
| 8,self_attn.k_proj,0.0000085051,0.05000,2.614 | |
| 8,self_attn.v_proj,0.0000075190,0.05000,2.743 | |
| 8,self_attn.o_proj,0.0000008019,0.05000,2.750 | |
| 8,self_attn.q_proj,0.0000069249,0.05000,2.772 | |
| 9,mlp.gate_proj,0.0000114569,0.05000,1.817 | |
| 9,mlp.up_proj,0.0000109181,0.05000,1.822 | |
| 9,mlp.down_proj,0.0000028727,0.05000,3.040 | |
| 9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.095 | |
| 9,self_attn.v_proj,0.0000005232,0.05000,1.285 | |
| 9,self_attn.k_proj,0.0000008164,0.05000,1.304 | |
| 9,self_attn.q_proj,0.0000084581,0.05000,1.328 | |
| 9,self_attn.o_gate,0.0000059179,0.05000,0.404 | |
| 10,mlp.up_proj,0.0000110046,0.05000,1.756 | |
| 10,mlp.gate_proj,0.0000113558,0.05000,1.940 | |
| 10,mlp.down_proj,0.0000026231,0.05000,3.068 | |
| 10,self_attn.q_proj,0.0000101732,0.05000,2.230 | |
| 10,self_attn.o_proj,0.0000007549,0.05000,2.262 | |
| 10,self_attn.k_proj,0.0000127262,0.05000,2.365 | |
| 10,self_attn.v_proj,0.0000108463,0.05000,2.387 | |
| 11,mlp.up_proj,0.0000111043,0.05000,2.183 | |
| 11,mlp.gate_proj,0.0000110840,0.05000,2.187 | |
| 11,mlp.down_proj,0.0000028018,0.05000,3.373 | |
| 11,self_attn.v_proj,0.0000078623,0.05000,2.592 | |
| 11,self_attn.k_proj,0.0000090132,0.05000,2.733 | |
| 11,self_attn.o_proj,0.0000008556,0.05000,2.759 | |
| 11,self_attn.q_proj,0.0000073984,0.05000,2.772 | |
| 12,mlp.gate_proj,0.0000107157,0.05000,2.134 | |
| 12,mlp.up_proj,0.0000109496,0.05000,2.144 | |
| 12,mlp.down_proj,0.0000028660,0.05000,3.327 | |
| 12,self_attn.v_proj,0.0000108852,0.05000,2.999 | |
| 12,self_attn.q_proj,0.0000093560,0.05000,3.022 | |
| 12,self_attn.o_proj,0.0000010866,0.05000,3.033 | |
| 12,self_attn.k_proj,0.0000102958,0.05000,3.039 | |
| 13,mlp.up_proj,0.0000113037,0.05000,2.134 | |
| 13,mlp.gate_proj,0.0000108564,0.05000,2.150 | |
| 13,mlp.down_proj,0.0000030354,0.05000,3.340 | |
| 13,self_attn.o_proj,0.0000011258,0.05000,2.630 | |
| 13,self_attn.q_proj,0.0000071202,0.05000,2.637 | |
| 13,self_attn.v_proj,0.0000072887,0.05000,2.655 | |
| 13,self_attn.k_proj,0.0000090965,0.05000,2.658 | |
| 14,mlp.gate_proj,0.0000102864,0.05000,2.116 | |
| 14,mlp.up_proj,0.0000112935,0.05000,2.131 | |
| 14,mlp.down_proj,0.0000033973,0.05000,3.311 | |
| 14,self_attn.q_proj,0.0000077342,0.05000,2.791 | |
| 14,self_attn.k_proj,0.0000088883,0.05000,2.801 | |
| 14,self_attn.o_proj,0.0000014352,0.05000,2.806 | |
| 14,self_attn.v_proj,0.0000076372,0.05000,2.810 | |
| 15,mlp.up_proj,0.0000109415,0.05000,2.113 | |
| 15,mlp.gate_proj,0.0000106577,0.05000,2.117 | |
| 15,mlp.down_proj,0.0000029832,0.05000,3.304 | |
| 15,self_attn.v_proj,0.0000053197,0.05000,2.740 | |
| 15,self_attn.k_proj,0.0000064845,0.05000,2.750 | |
| 15,self_attn.q_proj,0.0000050206,0.05000,2.756 | |
| 15,self_attn.o_proj,0.0000019126,0.05000,2.758 | |
| 16,mlp.up_proj,0.0000153010,0.05000,2.529 | |
| 16,mlp.gate_proj,0.0000156530,0.05000,2.533 | |
| 16,mlp.down_proj,0.0000057073,0.05000,3.721 | |
| 16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.100 | |
| 16,self_attn.q_proj,0.0000078978,0.05000,1.962 | |
| 16,self_attn.k_proj,0.0000007718,0.05000,1.963 | |
| 16,self_attn.v_proj,0.0000004404,0.05000,1.968 | |
| 16,self_attn.o_gate,0.0000046133,0.05000,0.430 | |
| 17,mlp.gate_proj,0.0000172837,0.05000,2.243 | |
| 17,mlp.up_proj,0.0000157615,0.05000,2.252 | |
| 17,mlp.down_proj,0.0000047822,0.05000,3.442 | |
| 17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.103 | |
| 17,self_attn.q_proj,0.0000081782,0.05000,1.955 | |
| 17,self_attn.k_proj,0.0000007155,0.05000,1.958 | |
| 17,self_attn.v_proj,0.0000006458,0.05000,1.977 | |
| 17,self_attn.o_gate,0.0000054807,0.05000,0.410 | |
| 18,mlp.up_proj,0.0000136515,0.05000,2.710 | |
| 18,mlp.gate_proj,0.0000141853,0.05000,2.718 | |
| 18,mlp.down_proj,0.0000036975,0.05000,3.911 | |
| 18,self_attn.q_proj,0.0000070243,0.05000,3.069 | |
| 18,self_attn.k_proj,0.0000099378,0.05000,3.079 | |
| 18,self_attn.v_proj,0.0000070944,0.05000,3.091 | |
| 18,self_attn.o_proj,0.0000013901,0.05000,3.093 | |
| 19,mlp.up_proj,0.0000140069,0.05000,2.184 | |
| 19,mlp.gate_proj,0.0000137057,0.05000,2.188 | |
| 19,mlp.down_proj,0.0000041433,0.05000,3.382 | |
| 19,self_attn.v_proj,0.0000092466,0.05000,2.955 | |
| 19,self_attn.k_proj,0.0000115108,0.05000,2.976 | |
| 19,self_attn.q_proj,0.0000087993,0.05000,2.978 | |
| 19,self_attn.o_proj,0.0000013312,0.05000,2.981 | |
| 20,mlp.gate_proj,0.0000136300,0.05000,2.360 | |
| 20,mlp.up_proj,0.0000144002,0.05000,2.367 | |
| 20,mlp.down_proj,0.0000054458,0.05000,3.551 | |
| 20,self_attn.k_proj,0.0000093558,0.05000,2.430 | |
| 20,self_attn.v_proj,0.0000069220,0.05000,2.485 | |
| 20,self_attn.q_proj,0.0000069259,0.05000,2.488 | |
| 20,self_attn.o_proj,0.0000021510,0.05000,2.491 | |
| 21,mlp.up_proj,0.0000136941,0.05000,1.901 | |
| 21,mlp.gate_proj,0.0000127453,0.05000,2.069 | |
| 21,mlp.down_proj,0.0000064656,0.05000,3.211 | |
| 21,self_attn.o_proj,0.0000032435,0.05000,2.564 | |
| 21,self_attn.k_proj,0.0000082270,0.05000,2.652 | |
| 21,self_attn.q_proj,0.0000072292,0.05000,2.698 | |
| 21,self_attn.v_proj,0.0000077253,0.05000,2.702 | |
| 22,mlp.gate_proj,0.0000178435,0.05000,2.226 | |
| 22,mlp.up_proj,0.0000191504,0.05000,2.237 | |
| 22,mlp.down_proj,0.0000115692,0.05000,3.429 | |
| 22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.105 | |
| 22,self_attn.q_proj,0.0000098795,0.05000,1.564 | |
| 22,self_attn.v_proj,0.0000011001,0.05000,1.603 | |
| 22,self_attn.k_proj,0.0000006362,0.05000,1.802 | |
| 22,self_attn.o_gate,0.0000052467,0.05000,0.556 | |
| 23,mlp.up_proj,0.0000205944,0.05000,2.568 | |
| 23,mlp.gate_proj,0.0000191698,0.05000,2.574 | |
| 23,mlp.down_proj,0.0000114501,0.05000,3.838 | |
| 23,self_attn.k_proj,0.0000123230,0.05000,3.183 | |
| 23,self_attn.v_proj,0.0000092833,0.05000,3.241 | |
| 23,self_attn.q_proj,0.0000096182,0.05000,3.242 | |
| 23,self_attn.o_proj,0.0000029686,0.05000,3.246 | |
| 24,mlp.up_proj,0.0000243043,0.05000,2.174 | |
| 24,mlp.gate_proj,0.0000224462,0.05000,2.178 | |
| 24,mlp.down_proj,0.0000185971,0.05000,3.367 | |
| 24,self_attn.o_proj,0.0000049888,0.05000,2.975 | |
| 24,self_attn.v_proj,0.0000083148,0.05000,2.986 | |
| 24,self_attn.k_proj,0.0000101886,0.05000,2.996 | |
| 24,self_attn.q_proj,0.0000081873,0.05000,3.004 | |
| 25,mlp.gate_proj,0.0000260207,0.05000,2.247 | |
| 25,mlp.up_proj,0.0000286292,0.05000,2.265 | |
| 25,mlp.down_proj,0.0000327358,0.05000,3.458 | |
| 25,self_attn.k_proj,0.0000109056,0.05000,3.008 | |
| 25,self_attn.o_proj,0.0000064882,0.05000,3.029 | |
| 25,self_attn.v_proj,0.0000084491,0.05000,3.036 | |
| 25,self_attn.q_proj,0.0000085720,0.05000,3.038 | |
| 26,mlp.gate_proj,0.0000290269,0.05000,2.488 | |
| 26,mlp.up_proj,0.0000328542,0.05000,2.502 | |
| 26,mlp.down_proj,0.0000283380,0.05000,3.690 | |
| 26,self_attn.o_proj,0.0000122008,0.05000,2.982 | |
| 26,self_attn.k_proj,0.0000195720,0.05000,2.992 | |
| 26,self_attn.q_proj,0.0000137304,0.05000,2.997 | |
| 26,self_attn.v_proj,0.0000134189,0.05000,2.999 | |
| 27,mlp.up_proj,0.0000379276,0.05000,2.567 | |
| 27,mlp.gate_proj,0.0000329030,0.05000,2.567 | |
| 27,mlp.down_proj,0.0000369176,0.05000,3.824 | |
| 27,self_attn.v_proj,0.0000120097,0.05000,2.916 | |
| 27,self_attn.q_proj,0.0000128456,0.05000,2.921 | |
| 27,self_attn.o_proj,0.0000076826,0.05000,2.924 | |
| 27,self_attn.k_proj,0.0000141221,0.05000,2.928 | |
| 28,mlp.gate_proj,0.0000373178,0.05000,2.244 | |
| 28,mlp.up_proj,0.0000440524,0.05000,2.267 | |
| 28,mlp.down_proj,0.0000514231,0.05000,3.471 | |
| 28,self_attn.k_proj,0.0000228180,0.05000,3.043 | |
| 28,self_attn.v_proj,0.0000169691,0.05000,3.054 | |
| 28,self_attn.q_proj,0.0000164989,0.05000,3.059 | |
| 28,self_attn.o_proj,0.0000238898,0.05000,3.062 | |
| 29,mlp.up_proj,0.0000564963,0.05000,2.573 | |
| 29,mlp.gate_proj,0.0000464832,0.05000,2.598 | |
| 29,mlp.down_proj,0.0000851531,0.05000,3.805 | |
| 29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.113 | |
| 29,self_attn.v_proj,0.0000107679,0.05000,2.037 | |
| 29,self_attn.q_proj,0.0000211549,0.05000,2.046 | |
| 29,self_attn.k_proj,0.0000011045,0.05000,2.075 | |
| 29,self_attn.o_gate,0.0000234260,0.05000,0.413 | |
| 30,mlp.gate_proj,0.0000589105,0.05000,2.553 | |
| 30,mlp.up_proj,0.0000711039,0.05000,2.561 | |
| 30,mlp.down_proj,0.0001752567,0.05000,3.852 | |
| 30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.099 | |
| 30,self_attn.k_proj,0.0000012830,0.05000,2.137 | |
| 30,self_attn.q_proj,0.0000324827,0.05000,2.140 | |
| 30,self_attn.v_proj,0.0000334367,0.05000,2.190 | |
| 30,self_attn.o_gate,0.0000373244,0.05000,0.436 | |
| 31,mlp.up_proj,0.0001038903,0.05000,2.291 | |
| 31,mlp.gate_proj,0.0000930895,0.05000,2.361 | |
| 31,mlp.down_proj,0.0007491196,0.05000,3.548 | |
| 31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.107 | |
| 31,self_attn.k_proj,0.0000008259,0.05000,2.147 | |
| 31,self_attn.v_proj,0.0000051840,0.05000,2.153 | |
| 31,self_attn.q_proj,0.0000221746,0.05000,2.155 | |
| 31,self_attn.o_gate,0.0000232037,0.05000,0.409 | |