| layer,module,loss,samples,damp,time | |
| 0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.138 | |
| 0,self_attn.q_proj,0.0000025054,0.05000,1.933 | |
| 0,self_attn.v_proj,0.0000002673,0.05000,1.967 | |
| 0,self_attn.k_proj,0.0000001197,0.05000,1.976 | |
| 0,self_attn.o_gate,0.0000053353,0.05000,0.394 | |
| 0,mlp.up_proj,0.0000028122,0.05000,2.091 | |
| 0,mlp.gate_proj,0.0000019878,0.05000,2.116 | |
| 0,mlp.down_proj,0.0000014094,0.05000,3.193 | |
| 1,self_attn.q_proj,0.0000198860,0.05000,2.327 | |
| 1,self_attn.o_proj,0.0000005668,0.05000,2.348 | |
| 1,self_attn.k_proj,0.0000202024,0.05000,2.356 | |
| 1,self_attn.v_proj,0.0000256484,0.05000,2.360 | |
| 1,mlp.up_proj,0.0000018768,0.05000,1.999 | |
| 1,mlp.gate_proj,0.0000017809,0.05000,2.000 | |
| 1,mlp.down_proj,0.0000005710,0.05000,3.093 | |
| 2,self_attn.k_proj,0.0000100614,0.05000,2.109 | |
| 2,self_attn.q_proj,0.0000101246,0.05000,2.183 | |
| 2,self_attn.o_proj,0.0000002428,0.05000,2.218 | |
| 2,self_attn.v_proj,0.0000119164,0.05000,2.239 | |
| 2,mlp.gate_proj,0.0000022318,0.05000,1.951 | |
| 2,mlp.up_proj,0.0000022416,0.05000,2.021 | |
| 2,mlp.down_proj,0.0000009728,0.05000,3.080 | |
| 3,self_attn.q_proj,0.0000125645,0.05000,2.153 | |
| 3,self_attn.v_proj,0.0000135237,0.05000,2.170 | |
| 3,self_attn.o_proj,0.0000003379,0.05000,2.181 | |
| 3,self_attn.k_proj,0.0000128744,0.05000,2.214 | |
| 3,mlp.gate_proj,0.0000045511,0.05000,1.978 | |
| 3,mlp.up_proj,0.0000040209,0.05000,2.037 | |
| 3,mlp.down_proj,0.0000014397,0.05000,3.115 | |
| 4,self_attn.q_proj,0.0000099334,0.05000,2.707 | |
| 4,self_attn.v_proj,0.0000120185,0.05000,2.720 | |
| 4,self_attn.o_proj,0.0000002742,0.05000,2.739 | |
| 4,self_attn.k_proj,0.0000101482,0.05000,2.742 | |
| 4,mlp.up_proj,0.0000054010,0.05000,1.751 | |
| 4,mlp.gate_proj,0.0000059484,0.05000,1.881 | |
| 4,mlp.down_proj,0.0000015523,0.05000,2.930 | |
| 5,self_attn.q_proj,0.0000107690,0.05000,2.388 | |
| 5,self_attn.v_proj,0.0000121319,0.05000,2.565 | |
| 5,self_attn.k_proj,0.0000113703,0.05000,2.571 | |
| 5,self_attn.o_proj,0.0000004462,0.05000,2.596 | |
| 5,mlp.gate_proj,0.0000073629,0.05000,2.027 | |
| 5,mlp.up_proj,0.0000066999,0.05000,2.040 | |
| 5,mlp.down_proj,0.0000016318,0.05000,3.135 | |
| 6,self_attn.k_proj,0.0000124642,0.05000,2.605 | |
| 6,self_attn.q_proj,0.0000104139,0.05000,2.671 | |
| 6,self_attn.o_proj,0.0000004455,0.05000,2.673 | |
| 6,self_attn.v_proj,0.0000109036,0.05000,2.687 | |
| 6,mlp.up_proj,0.0000082335,0.05000,2.047 | |
| 6,mlp.gate_proj,0.0000089300,0.05000,2.052 | |
| 6,mlp.down_proj,0.0000018200,0.05000,3.152 | |
| 7,self_attn.k_proj,0.0000113131,0.05000,2.759 | |
| 7,self_attn.v_proj,0.0000101746,0.05000,2.777 | |
| 7,self_attn.o_proj,0.0000004802,0.05000,2.778 | |
| 7,self_attn.q_proj,0.0000092834,0.05000,2.784 | |
| 7,mlp.up_proj,0.0000086123,0.05000,2.016 | |
| 7,mlp.gate_proj,0.0000091405,0.05000,2.034 | |
| 7,mlp.down_proj,0.0000021072,0.05000,3.119 | |
| 8,self_attn.o_proj,0.0000007475,0.05000,2.422 | |
| 8,self_attn.v_proj,0.0000070005,0.05000,2.433 | |
| 8,self_attn.q_proj,0.0000064478,0.05000,2.495 | |
| 8,self_attn.k_proj,0.0000079277,0.05000,2.502 | |
| 8,mlp.up_proj,0.0000088850,0.05000,3.785 | |
| 8,mlp.gate_proj,0.0000097536,0.05000,3.857 | |
| 8,mlp.down_proj,0.0000020750,0.05000,6.063 | |
| 9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.102 | |
| 9,self_attn.q_proj,0.0000078441,0.05000,1.728 | |
| 9,self_attn.k_proj,0.0000007585,0.05000,1.730 | |
| 9,self_attn.v_proj,0.0000004848,0.05000,1.765 | |
| 9,self_attn.o_gate,0.0000054848,0.05000,0.385 | |
| 9,mlp.gate_proj,0.0000107061,0.05000,2.022 | |
| 9,mlp.up_proj,0.0000102051,0.05000,2.036 | |
| 9,mlp.down_proj,0.0000026359,0.05000,3.114 | |
| 10,self_attn.o_proj,0.0000006941,0.05000,2.442 | |
| 10,self_attn.k_proj,0.0000118222,0.05000,2.463 | |
| 10,self_attn.q_proj,0.0000094431,0.05000,2.511 | |
| 10,self_attn.v_proj,0.0000100697,0.05000,2.516 | |
| 10,mlp.up_proj,0.0000102459,0.05000,1.732 | |
| 10,mlp.gate_proj,0.0000105735,0.05000,1.865 | |
| 10,mlp.down_proj,0.0000024634,0.05000,2.896 | |
| 11,self_attn.q_proj,0.0000068717,0.05000,2.337 | |
| 11,self_attn.o_proj,0.0000007806,0.05000,2.350 | |
| 11,self_attn.v_proj,0.0000072992,0.05000,2.433 | |
| 11,self_attn.k_proj,0.0000083771,0.05000,2.435 | |
| 11,mlp.gate_proj,0.0000102914,0.05000,2.056 | |
| 11,mlp.up_proj,0.0000103112,0.05000,2.066 | |
| 11,mlp.down_proj,0.0000026183,0.05000,3.164 | |
| 12,self_attn.k_proj,0.0000095307,0.05000,2.719 | |
| 12,self_attn.o_proj,0.0000009991,0.05000,2.808 | |
| 12,self_attn.q_proj,0.0000086598,0.05000,2.811 | |
| 12,self_attn.v_proj,0.0000100696,0.05000,2.829 | |
| 12,mlp.up_proj,0.0000101439,0.05000,2.039 | |
| 12,mlp.gate_proj,0.0000099271,0.05000,2.045 | |
| 12,mlp.down_proj,0.0000026657,0.05000,3.134 | |
| 13,self_attn.o_proj,0.0000010317,0.05000,2.791 | |
| 13,self_attn.q_proj,0.0000065795,0.05000,2.790 | |
| 13,self_attn.v_proj,0.0000067338,0.05000,2.802 | |
| 13,self_attn.k_proj,0.0000084106,0.05000,2.804 | |
| 13,mlp.up_proj,0.0000104438,0.05000,2.054 | |
| 13,mlp.gate_proj,0.0000100330,0.05000,2.062 | |
| 13,mlp.down_proj,0.0000028064,0.05000,3.163 | |
| 14,self_attn.k_proj,0.0000081929,0.05000,2.847 | |
| 14,self_attn.o_proj,0.0000013027,0.05000,2.863 | |
| 14,self_attn.q_proj,0.0000071310,0.05000,2.869 | |
| 14,self_attn.v_proj,0.0000070400,0.05000,2.872 | |
| 14,mlp.gate_proj,0.0000094866,0.05000,1.740 | |
| 14,mlp.up_proj,0.0000104130,0.05000,1.846 | |
| 14,mlp.down_proj,0.0000031477,0.05000,2.910 | |
| 15,self_attn.o_proj,0.0000017505,0.05000,2.218 | |
| 15,self_attn.q_proj,0.0000046234,0.05000,2.263 | |
| 15,self_attn.v_proj,0.0000049011,0.05000,2.275 | |
| 15,self_attn.k_proj,0.0000059547,0.05000,2.337 | |
| 15,mlp.gate_proj,0.0000098456,0.05000,1.961 | |
| 15,mlp.up_proj,0.0000101065,0.05000,2.010 | |
| 15,mlp.down_proj,0.0000027747,0.05000,3.079 | |
| 16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.100 | |
| 16,self_attn.q_proj,0.0000072823,0.05000,1.562 | |
| 16,self_attn.v_proj,0.0000004060,0.05000,1.615 | |
| 16,self_attn.k_proj,0.0000007123,0.05000,1.691 | |
| 16,self_attn.o_gate,0.0000042507,0.05000,0.451 | |
| 16,mlp.up_proj,0.0000126712,0.05000,1.675 | |
| 16,mlp.gate_proj,0.0000129596,0.05000,1.690 | |
| 16,mlp.down_proj,0.0000040638,0.05000,2.776 | |
| 17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.101 | |
| 17,self_attn.q_proj,0.0000075214,0.05000,1.732 | |
| 17,self_attn.v_proj,0.0000005942,0.05000,1.763 | |
| 17,self_attn.k_proj,0.0000006580,0.05000,1.767 | |
| 17,self_attn.o_gate,0.0000050420,0.05000,0.386 | |
| 17,mlp.gate_proj,0.0000151962,0.05000,1.814 | |
| 17,mlp.up_proj,0.0000138615,0.05000,1.927 | |
| 17,mlp.down_proj,0.0000039890,0.05000,2.982 | |
| 18,self_attn.k_proj,0.0000091951,0.05000,2.866 | |
| 18,self_attn.v_proj,0.0000065547,0.05000,2.887 | |
| 18,self_attn.o_proj,0.0000012725,0.05000,2.897 | |
| 18,self_attn.q_proj,0.0000064936,0.05000,2.902 | |
| 18,mlp.up_proj,0.0000126479,0.05000,2.038 | |
| 18,mlp.gate_proj,0.0000131430,0.05000,2.050 | |
| 18,mlp.down_proj,0.0000034282,0.05000,3.146 | |
| 19,self_attn.v_proj,0.0000085492,0.05000,4.782 | |
| 19,self_attn.q_proj,0.0000081404,0.05000,4.843 | |
| 19,self_attn.k_proj,0.0000106452,0.05000,4.891 | |
| 19,self_attn.o_proj,0.0000012228,0.05000,4.896 | |
| 19,mlp.gate_proj,0.0000127026,0.05000,2.054 | |
| 19,mlp.up_proj,0.0000129798,0.05000,2.060 | |
| 19,mlp.down_proj,0.0000038460,0.05000,3.153 | |
| 20,self_attn.o_proj,0.0000019678,0.05000,2.087 | |
| 20,self_attn.v_proj,0.0000063993,0.05000,2.091 | |
| 20,self_attn.k_proj,0.0000086538,0.05000,2.108 | |
| 20,self_attn.q_proj,0.0000064077,0.05000,2.143 | |
| 20,mlp.gate_proj,0.0000126178,0.05000,2.050 | |
| 20,mlp.up_proj,0.0000133294,0.05000,2.058 | |
| 20,mlp.down_proj,0.0000050352,0.05000,3.156 | |
| 21,self_attn.q_proj,0.0000066632,0.05000,2.744 | |
| 21,self_attn.o_proj,0.0000029935,0.05000,2.773 | |
| 21,self_attn.v_proj,0.0000071241,0.05000,2.776 | |
| 21,self_attn.k_proj,0.0000075829,0.05000,2.777 | |
| 21,mlp.gate_proj,0.0000117755,0.05000,2.060 | |
| 21,mlp.up_proj,0.0000126541,0.05000,2.066 | |
| 21,mlp.down_proj,0.0000059720,0.05000,3.155 | |
| 22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.110 | |
| 22,self_attn.q_proj,0.0000090866,0.05000,1.417 | |
| 22,self_attn.v_proj,0.0000010102,0.05000,1.445 | |
| 22,self_attn.k_proj,0.0000005854,0.05000,1.667 | |
| 22,self_attn.o_gate,0.0000048253,0.05000,0.779 | |
| 22,mlp.up_proj,0.0000161680,0.05000,2.067 | |
| 22,mlp.gate_proj,0.0000150658,0.05000,2.070 | |
| 22,mlp.down_proj,0.0000087456,0.05000,3.160 | |
| 23,self_attn.v_proj,0.0000085384,0.05000,2.781 | |
| 23,self_attn.k_proj,0.0000113444,0.05000,2.789 | |
| 23,self_attn.q_proj,0.0000088479,0.05000,2.795 | |
| 23,self_attn.o_proj,0.0000027456,0.05000,2.799 | |
| 23,mlp.gate_proj,0.0000176874,0.05000,2.017 | |
| 23,mlp.up_proj,0.0000190031,0.05000,2.016 | |
| 23,mlp.down_proj,0.0000105173,0.05000,3.106 | |
| 24,self_attn.o_proj,0.0000046072,0.05000,2.394 | |
| 24,self_attn.v_proj,0.0000076524,0.05000,2.431 | |
| 24,self_attn.q_proj,0.0000075387,0.05000,2.491 | |
| 24,self_attn.k_proj,0.0000093884,0.05000,2.491 | |
| 24,mlp.gate_proj,0.0000207282,0.05000,1.679 | |
| 24,mlp.up_proj,0.0000224395,0.05000,1.695 | |
| 24,mlp.down_proj,0.0000171202,0.05000,2.841 | |
| 25,self_attn.q_proj,0.0000078854,0.05000,2.658 | |
| 25,self_attn.o_proj,0.0000059637,0.05000,2.663 | |
| 25,self_attn.v_proj,0.0000077710,0.05000,2.721 | |
| 25,self_attn.k_proj,0.0000100391,0.05000,2.728 | |
| 25,mlp.gate_proj,0.0000239999,0.05000,2.067 | |
| 25,mlp.up_proj,0.0000264045,0.05000,2.088 | |
| 25,mlp.down_proj,0.0000301652,0.05000,3.176 | |
| 26,self_attn.q_proj,0.0000126346,0.05000,2.771 | |
| 26,self_attn.v_proj,0.0000123492,0.05000,2.776 | |
| 26,self_attn.o_proj,0.0000112689,0.05000,2.795 | |
| 26,self_attn.k_proj,0.0000180101,0.05000,2.801 | |
| 26,mlp.gate_proj,0.0000267217,0.05000,2.057 | |
| 26,mlp.up_proj,0.0000302445,0.05000,2.072 | |
| 26,mlp.down_proj,0.0000262709,0.05000,3.169 | |
| 27,self_attn.k_proj,0.0000130042,0.05000,4.944 | |
| 27,self_attn.v_proj,0.0000110681,0.05000,4.947 | |
| 27,self_attn.q_proj,0.0000118468,0.05000,4.956 | |
| 27,self_attn.o_proj,0.0000070263,0.05000,4.957 | |
| 27,mlp.up_proj,0.0000350091,0.05000,2.034 | |
| 27,mlp.gate_proj,0.0000303793,0.05000,2.047 | |
| 27,mlp.down_proj,0.0000345169,0.05000,3.155 | |
| 28,self_attn.v_proj,0.0000156234,0.05000,2.886 | |
| 28,self_attn.o_proj,0.0000215157,0.05000,2.916 | |
| 28,self_attn.q_proj,0.0000151926,0.05000,2.922 | |
| 28,self_attn.k_proj,0.0000210312,0.05000,2.927 | |
| 28,mlp.up_proj,0.0000407514,0.05000,1.989 | |
| 28,mlp.gate_proj,0.0000345202,0.05000,2.030 | |
| 28,mlp.down_proj,0.0000481835,0.05000,3.106 | |
| 29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.085 | |
| 29,self_attn.q_proj,0.0000194512,0.05000,1.404 | |
| 29,self_attn.v_proj,0.0000098854,0.05000,1.465 | |
| 29,self_attn.k_proj,0.0000010127,0.05000,1.586 | |
| 29,self_attn.o_gate,0.0000215202,0.05000,0.486 | |
| 29,mlp.gate_proj,0.0000427544,0.05000,1.730 | |
| 29,mlp.up_proj,0.0000519517,0.05000,1.899 | |
| 29,mlp.down_proj,0.0000784172,0.05000,2.944 | |
| 30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.114 | |
| 30,self_attn.q_proj,0.0000298560,0.05000,1.735 | |
| 30,self_attn.v_proj,0.0000307847,0.05000,1.778 | |
| 30,self_attn.k_proj,0.0000011827,0.05000,1.793 | |
| 30,self_attn.o_gate,0.0000342893,0.05000,0.404 | |
| 30,mlp.gate_proj,0.0000556245,0.05000,2.027 | |
| 30,mlp.up_proj,0.0000671554,0.05000,2.046 | |
| 30,mlp.down_proj,0.0001689895,0.05000,3.163 | |
| 31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.110 | |
| 31,self_attn.q_proj,0.0000205567,0.05000,1.736 | |
| 31,self_attn.k_proj,0.0000007648,0.05000,1.774 | |
| 31,self_attn.v_proj,0.0000048051,0.05000,1.782 | |
| 31,self_attn.o_gate,0.0000215028,0.05000,0.393 | |
| 31,mlp.gate_proj,0.0000875096,0.05000,3.228 | |
| 31,mlp.up_proj,0.0000976488,0.05000,3.286 | |
| 31,mlp.down_proj,0.0006907178,0.05000,5.370 | |