Yi-1.5-9B-4bit-gptq / quant_log.json
LRL
Init Model
2c235dd
raw
history blame contribute delete
No virus
34.4 kB
[{"layer": 1, "module": "self_attn.k_proj", "avg_loss": 7.52349126906622, "time": 1.1630947589874268}, {"layer": 1, "module": "self_attn.v_proj", "avg_loss": 0.08373475453210255, "time": 1.1010050773620605}, {"layer": 1, "module": "self_attn.q_proj", "avg_loss": 15.925281343005953, "time": 1.088221788406372}, {"layer": 1, "module": "self_attn.o_proj", "avg_loss": 0.001180047080630348, "time": 1.0577669143676758}, {"layer": 1, "module": "mlp.up_proj", "avg_loss": 3.072269015842014, "time": 1.111659049987793}, {"layer": 1, "module": "mlp.gate_proj", "avg_loss": 2.9739554268973216, "time": 1.0931346416473389}, {"layer": 1, "module": "mlp.down_proj", "avg_loss": 0.009709445257035513, "time": 2.9947032928466797}, {"layer": 2, "module": "self_attn.k_proj", "avg_loss": 0.6464218866257441, "time": 1.023256540298462}, {"layer": 2, "module": "self_attn.v_proj", "avg_loss": 0.07744942014179533, "time": 1.0639867782592773}, {"layer": 2, "module": "self_attn.q_proj", "avg_loss": 1.9714359101795016, "time": 1.0279955863952637}, {"layer": 2, "module": "self_attn.o_proj", "avg_loss": 0.0023630072200109087, "time": 0.9931063652038574}, {"layer": 2, "module": "mlp.up_proj", "avg_loss": 4.872817266555059, "time": 1.0277495384216309}, {"layer": 2, "module": "mlp.gate_proj", "avg_loss": 5.059110611204117, "time": 1.0357348918914795}, {"layer": 2, "module": "mlp.down_proj", "avg_loss": 0.10512986258854942, "time": 2.9946086406707764}, {"layer": 3, "module": "self_attn.k_proj", "avg_loss": 7.33825199187748, "time": 1.0770301818847656}, {"layer": 3, "module": "self_attn.v_proj", "avg_loss": 0.9383392333984375, "time": 1.0685534477233887}, {"layer": 3, "module": "self_attn.q_proj", "avg_loss": 23.23229786706349, "time": 1.0336403846740723}, {"layer": 3, "module": "self_attn.o_proj", "avg_loss": 0.060378823961530416, "time": 1.0090138912200928}, {"layer": 3, "module": "mlp.up_proj", "avg_loss": 16.608592199900794, "time": 1.041032075881958}, {"layer": 3, "module": "mlp.gate_proj", "avg_loss": 17.76344711061508, "time": 1.1240558624267578}, {"layer": 3, "module": "mlp.down_proj", "avg_loss": 0.1814044922117203, "time": 2.8854904174804688}, {"layer": 4, "module": "self_attn.k_proj", "avg_loss": 11.714502728174603, "time": 1.0484309196472168}, {"layer": 4, "module": "self_attn.v_proj", "avg_loss": 1.5227622380332342, "time": 1.0788679122924805}, {"layer": 4, "module": "self_attn.q_proj", "avg_loss": 38.741327194940474, "time": 1.0589616298675537}, {"layer": 4, "module": "self_attn.o_proj", "avg_loss": 0.04136672852531312, "time": 1.0718693733215332}, {"layer": 4, "module": "mlp.up_proj", "avg_loss": 29.166610475570437, "time": 1.1367878913879395}, {"layer": 4, "module": "mlp.gate_proj", "avg_loss": 31.6925533234127, "time": 1.1956281661987305}, {"layer": 4, "module": "mlp.down_proj", "avg_loss": 0.3683233109731523, "time": 2.849614381790161}, {"layer": 5, "module": "self_attn.k_proj", "avg_loss": 32.75880068824405, "time": 1.1274654865264893}, {"layer": 5, "module": "self_attn.v_proj", "avg_loss": 2.744386218843006, "time": 1.1131956577301025}, {"layer": 5, "module": "self_attn.q_proj", "avg_loss": 105.65990823412699, "time": 1.0806984901428223}, {"layer": 5, "module": "self_attn.o_proj", "avg_loss": 0.11138301425509983, "time": 1.054008960723877}, {"layer": 5, "module": "mlp.up_proj", "avg_loss": 41.939670138888886, "time": 1.1375303268432617}, {"layer": 5, "module": "mlp.gate_proj", "avg_loss": 46.05641586061508, "time": 1.1563236713409424}, {"layer": 5, "module": "mlp.down_proj", "avg_loss": 0.5417611258370536, "time": 3.1735482215881348}, {"layer": 6, "module": "self_attn.k_proj", "avg_loss": 25.098222036210316, "time": 1.0935032367706299}, {"layer": 6, "module": "self_attn.v_proj", "avg_loss": 4.355132572234623, "time": 1.0675392150878906}, {"layer": 6, "module": "self_attn.q_proj", "avg_loss": 100.22143167162699, "time": 1.1154658794403076}, {"layer": 6, "module": "self_attn.o_proj", "avg_loss": 0.11760309764317103, "time": 1.076556921005249}, {"layer": 6, "module": "mlp.up_proj", "avg_loss": 55.6280769469246, "time": 1.0871191024780273}, {"layer": 6, "module": "mlp.gate_proj", "avg_loss": 64.4370620969742, "time": 1.2686126232147217}, {"layer": 6, "module": "mlp.down_proj", "avg_loss": 0.9935060531374008, "time": 3.1090173721313477}, {"layer": 7, "module": "self_attn.k_proj", "avg_loss": 32.297731429811506, "time": 1.0584325790405273}, {"layer": 7, "module": "self_attn.v_proj", "avg_loss": 4.689338805183532, "time": 1.1655402183532715}, {"layer": 7, "module": "self_attn.q_proj", "avg_loss": 129.46330915178572, "time": 1.1890902519226074}, {"layer": 7, "module": "self_attn.o_proj", "avg_loss": 0.16881759583003939, "time": 1.2585082054138184}, {"layer": 7, "module": "mlp.up_proj", "avg_loss": 68.60016741071429, "time": 1.6254353523254395}, {"layer": 7, "module": "mlp.gate_proj", "avg_loss": 78.79722377232143, "time": 1.446023941040039}, {"layer": 7, "module": "mlp.down_proj", "avg_loss": 1.5680007934570312, "time": 3.6363916397094727}, {"layer": 8, "module": "self_attn.k_proj", "avg_loss": 44.36006479414682, "time": 1.0758891105651855}, {"layer": 8, "module": "self_attn.v_proj", "avg_loss": 5.234616234188988, "time": 1.0499072074890137}, {"layer": 8, "module": "self_attn.q_proj", "avg_loss": 158.43790302579364, "time": 1.1441864967346191}, {"layer": 8, "module": "self_attn.o_proj", "avg_loss": 0.4192157927013579, "time": 1.339003086090088}, {"layer": 8, "module": "mlp.up_proj", "avg_loss": 90.40180121527777, "time": 1.1430809497833252}, {"layer": 8, "module": "mlp.gate_proj", "avg_loss": 108.40038287450396, "time": 1.1591823101043701}, {"layer": 8, "module": "mlp.down_proj", "avg_loss": 2.562350802951389, "time": 3.3928768634796143}, {"layer": 9, "module": "self_attn.k_proj", "avg_loss": 46.55107576884921, "time": 1.072840929031372}, {"layer": 9, "module": "self_attn.v_proj", "avg_loss": 5.2358282180059526, "time": 1.0901210308074951}, {"layer": 9, "module": "self_attn.q_proj", "avg_loss": 175.38368055555554, "time": 1.0340874195098877}, {"layer": 9, "module": "self_attn.o_proj", "avg_loss": 0.5897783551897321, "time": 1.0512444972991943}, {"layer": 9, "module": "mlp.up_proj", "avg_loss": 120.03951202876983, "time": 1.0742559432983398}, {"layer": 9, "module": "mlp.gate_proj", "avg_loss": 137.30831473214286, "time": 1.0603156089782715}, {"layer": 9, "module": "mlp.down_proj", "avg_loss": 95.71980406746032, "time": 3.067931652069092}, {"layer": 10, "module": "self_attn.k_proj", "avg_loss": 55.36338975694444, "time": 1.022320032119751}, {"layer": 10, "module": "self_attn.v_proj", "avg_loss": 8.780309283544147, "time": 1.0267996788024902}, {"layer": 10, "module": "self_attn.q_proj", "avg_loss": 226.46206907242063, "time": 1.051985740661621}, {"layer": 10, "module": "self_attn.o_proj", "avg_loss": 0.612264905657087, "time": 1.0027201175689697}, {"layer": 10, "module": "mlp.up_proj", "avg_loss": 147.97820560515873, "time": 1.0629839897155762}, {"layer": 10, "module": "mlp.gate_proj", "avg_loss": 173.31767733134922, "time": 1.1273105144500732}, {"layer": 10, "module": "mlp.down_proj", "avg_loss": 4.891019790891617, "time": 2.8440380096435547}, {"layer": 11, "module": "self_attn.k_proj", "avg_loss": 48.90804811507937, "time": 1.0773625373840332}, {"layer": 11, "module": "self_attn.v_proj", "avg_loss": 7.6128423781622026, "time": 1.037905216217041}, {"layer": 11, "module": "self_attn.q_proj", "avg_loss": 202.04013206845238, "time": 1.004753589630127}, {"layer": 11, "module": "self_attn.o_proj", "avg_loss": 1.2833310081845237, "time": 1.024329423904419}, {"layer": 11, "module": "mlp.up_proj", "avg_loss": 174.1114986359127, "time": 1.0371954441070557}, {"layer": 11, "module": "mlp.gate_proj", "avg_loss": 199.97361731150792, "time": 1.0165190696716309}, {"layer": 11, "module": "mlp.down_proj", "avg_loss": 6.183013431609623, "time": 2.7859435081481934}, {"layer": 12, "module": "self_attn.k_proj", "avg_loss": 56.35047743055556, "time": 1.0019557476043701}, {"layer": 12, "module": "self_attn.v_proj", "avg_loss": 10.020750015500992, "time": 1.003159999847412}, {"layer": 12, "module": "self_attn.q_proj", "avg_loss": 275.38206845238096, "time": 0.9957282543182373}, {"layer": 12, "module": "self_attn.o_proj", "avg_loss": 1.4958492460704984, "time": 0.9998331069946289}, {"layer": 12, "module": "mlp.up_proj", "avg_loss": 203.31595672123015, "time": 1.0229260921478271}, {"layer": 12, "module": "mlp.gate_proj", "avg_loss": 235.36024305555554, "time": 1.075915813446045}, {"layer": 12, "module": "mlp.down_proj", "avg_loss": 8.37230476500496, "time": 3.007920503616333}, {"layer": 13, "module": "self_attn.k_proj", "avg_loss": 40.5962882874504, "time": 1.056520938873291}, {"layer": 13, "module": "self_attn.v_proj", "avg_loss": 10.291808113219245, "time": 1.024430513381958}, {"layer": 13, "module": "self_attn.q_proj", "avg_loss": 191.62673611111111, "time": 1.0386319160461426}, {"layer": 13, "module": "self_attn.o_proj", "avg_loss": 2.5307084340897816, "time": 1.138993740081787}, {"layer": 13, "module": "mlp.up_proj", "avg_loss": 210.779544890873, "time": 1.0474636554718018}, {"layer": 13, "module": "mlp.gate_proj", "avg_loss": 239.1571800595238, "time": 1.0308430194854736}, {"layer": 13, "module": "mlp.down_proj", "avg_loss": 10.236461821056547, "time": 2.972018241882324}, {"layer": 14, "module": "self_attn.k_proj", "avg_loss": 56.10951063368056, "time": 1.1398496627807617}, {"layer": 14, "module": "self_attn.v_proj", "avg_loss": 8.339837937127976, "time": 1.0517876148223877}, {"layer": 14, "module": "self_attn.q_proj", "avg_loss": 253.38023933531747, "time": 1.0432765483856201}, {"layer": 14, "module": "self_attn.o_proj", "avg_loss": 2.8181360638330855, "time": 1.095984935760498}, {"layer": 14, "module": "mlp.up_proj", "avg_loss": 220.8121589781746, "time": 1.1745338439941406}, {"layer": 14, "module": "mlp.gate_proj", "avg_loss": 241.32818700396825, "time": 1.0919160842895508}, {"layer": 14, "module": "mlp.down_proj", "avg_loss": 11.26571994357639, "time": 2.9433743953704834}, {"layer": 15, "module": "self_attn.k_proj", "avg_loss": 58.840692429315474, "time": 1.0389595031738281}, {"layer": 15, "module": "self_attn.v_proj", "avg_loss": 13.502135261656745, "time": 1.0254015922546387}, {"layer": 15, "module": "self_attn.q_proj", "avg_loss": 297.49131944444446, "time": 1.0251946449279785}, {"layer": 15, "module": "self_attn.o_proj", "avg_loss": 2.6048341781374007, "time": 1.0388894081115723}, {"layer": 15, "module": "mlp.up_proj", "avg_loss": 237.53087797619048, "time": 1.0519826412200928}, {"layer": 15, "module": "mlp.gate_proj", "avg_loss": 253.08072916666666, "time": 1.0490944385528564}, {"layer": 15, "module": "mlp.down_proj", "avg_loss": 12.998457651289682, "time": 2.8661410808563232}, {"layer": 16, "module": "self_attn.k_proj", "avg_loss": 53.72779482886905, "time": 1.0643634796142578}, {"layer": 16, "module": "self_attn.v_proj", "avg_loss": 14.790426587301587, "time": 1.0377209186553955}, {"layer": 16, "module": "self_attn.q_proj", "avg_loss": 267.9432353670635, "time": 1.067474365234375}, {"layer": 16, "module": "self_attn.o_proj", "avg_loss": 4.112435477120536, "time": 1.0737264156341553}, {"layer": 16, "module": "mlp.up_proj", "avg_loss": 262.40252976190476, "time": 1.117429256439209}, {"layer": 16, "module": "mlp.gate_proj", "avg_loss": 274.0640500992063, "time": 1.1759240627288818}, {"layer": 16, "module": "mlp.down_proj", "avg_loss": 14.079518151661706, "time": 2.986429452896118}, {"layer": 17, "module": "self_attn.k_proj", "avg_loss": 56.540062313988095, "time": 1.0962607860565186}, {"layer": 17, "module": "self_attn.v_proj", "avg_loss": 16.60202946738591, "time": 1.0783686637878418}, {"layer": 17, "module": "self_attn.q_proj", "avg_loss": 290.79296875, "time": 1.0680320262908936}, {"layer": 17, "module": "self_attn.o_proj", "avg_loss": 3.7848709348648315, "time": 1.0963997840881348}, {"layer": 17, "module": "mlp.up_proj", "avg_loss": 277.12196180555554, "time": 1.1517152786254883}, {"layer": 17, "module": "mlp.gate_proj", "avg_loss": 284.26488095238096, "time": 1.0294015407562256}, {"layer": 17, "module": "mlp.down_proj", "avg_loss": 16.143407428075395, "time": 2.820133924484253}, {"layer": 18, "module": "self_attn.k_proj", "avg_loss": 61.497043185763886, "time": 1.0356614589691162}, {"layer": 18, "module": "self_attn.v_proj", "avg_loss": 16.967451791914684, "time": 1.0018455982208252}, {"layer": 18, "module": "self_attn.q_proj", "avg_loss": 324.53772941468253, "time": 0.9974510669708252}, {"layer": 18, "module": "self_attn.o_proj", "avg_loss": 5.34582035125248, "time": 1.009718656539917}, {"layer": 18, "module": "mlp.up_proj", "avg_loss": 274.561042906746, "time": 1.0596230030059814}, {"layer": 18, "module": "mlp.gate_proj", "avg_loss": 271.2317708333333, "time": 1.082549810409546}, {"layer": 18, "module": "mlp.down_proj", "avg_loss": 17.576241629464285, "time": 3.0855305194854736}, {"layer": 19, "module": "self_attn.k_proj", "avg_loss": 56.947335379464285, "time": 1.0334131717681885}, {"layer": 19, "module": "self_attn.v_proj", "avg_loss": 21.756975446428573, "time": 1.0160613059997559}, {"layer": 19, "module": "self_attn.q_proj", "avg_loss": 315.72867063492066, "time": 1.0155396461486816}, {"layer": 19, "module": "self_attn.o_proj", "avg_loss": 6.6957901243179565, "time": 1.0783989429473877}, {"layer": 19, "module": "mlp.up_proj", "avg_loss": 280.0032552083333, "time": 1.137660264968872}, {"layer": 19, "module": "mlp.gate_proj", "avg_loss": 278.46440972222223, "time": 1.1042287349700928}, {"layer": 19, "module": "mlp.down_proj", "avg_loss": 19.60300796750992, "time": 3.185258388519287}, {"layer": 20, "module": "self_attn.k_proj", "avg_loss": 57.43148173983135, "time": 1.123023509979248}, {"layer": 20, "module": "self_attn.v_proj", "avg_loss": 25.515574621775794, "time": 1.1184489727020264}, {"layer": 20, "module": "self_attn.q_proj", "avg_loss": 311.0196552579365, "time": 1.083491563796997}, {"layer": 20, "module": "self_attn.o_proj", "avg_loss": 6.005231584821429, "time": 1.0571365356445312}, {"layer": 20, "module": "mlp.up_proj", "avg_loss": 312.4563492063492, "time": 1.1487329006195068}, {"layer": 20, "module": "mlp.gate_proj", "avg_loss": 320.8627232142857, "time": 1.1310412883758545}, {"layer": 20, "module": "mlp.down_proj", "avg_loss": 19.738385881696427, "time": 2.8906917572021484}, {"layer": 21, "module": "self_attn.k_proj", "avg_loss": 65.1660388764881, "time": 1.2152392864227295}, {"layer": 21, "module": "self_attn.v_proj", "avg_loss": 17.67267330109127, "time": 1.1149086952209473}, {"layer": 21, "module": "self_attn.q_proj", "avg_loss": 319.63182043650795, "time": 1.0196852684020996}, {"layer": 21, "module": "self_attn.o_proj", "avg_loss": 4.277507479228671, "time": 1.0589771270751953}, {"layer": 21, "module": "mlp.up_proj", "avg_loss": 313.3365885416667, "time": 1.0940759181976318}, {"layer": 21, "module": "mlp.gate_proj", "avg_loss": 325.1614273313492, "time": 1.1137669086456299}, {"layer": 21, "module": "mlp.down_proj", "avg_loss": 18.43777126736111, "time": 2.8516950607299805}, {"layer": 22, "module": "self_attn.k_proj", "avg_loss": 65.29079861111111, "time": 1.0320312976837158}, {"layer": 22, "module": "self_attn.v_proj", "avg_loss": 19.82035900297619, "time": 1.0296845436096191}, {"layer": 22, "module": "self_attn.q_proj", "avg_loss": 308.25396825396825, "time": 1.0083372592926025}, {"layer": 22, "module": "self_attn.o_proj", "avg_loss": 3.2406744578528026, "time": 1.0261244773864746}, {"layer": 22, "module": "mlp.up_proj", "avg_loss": 298.8736979166667, "time": 1.0576684474945068}, {"layer": 22, "module": "mlp.gate_proj", "avg_loss": 314.6656746031746, "time": 1.0390617847442627}, {"layer": 22, "module": "mlp.down_proj", "avg_loss": 15.422068762400794, "time": 2.8770341873168945}, {"layer": 23, "module": "self_attn.k_proj", "avg_loss": 64.84402901785714, "time": 1.0294642448425293}, {"layer": 23, "module": "self_attn.v_proj", "avg_loss": 21.01715766059028, "time": 1.019596815109253}, {"layer": 23, "module": "self_attn.q_proj", "avg_loss": 314.2331039186508, "time": 1.0858991146087646}, {"layer": 23, "module": "self_attn.o_proj", "avg_loss": 3.716708228701637, "time": 1.3629579544067383}, {"layer": 23, "module": "mlp.up_proj", "avg_loss": 297.57415674603175, "time": 1.3015711307525635}, {"layer": 23, "module": "mlp.gate_proj", "avg_loss": 301.7003968253968, "time": 1.5459051132202148}, {"layer": 23, "module": "mlp.down_proj", "avg_loss": 12.987072172619047, "time": 3.6574902534484863}, {"layer": 24, "module": "self_attn.k_proj", "avg_loss": 46.403099423363095, "time": 1.1384117603302002}, {"layer": 24, "module": "self_attn.v_proj", "avg_loss": 28.594277033730158, "time": 1.1221301555633545}, {"layer": 24, "module": "self_attn.q_proj", "avg_loss": 252.2778087797619, "time": 1.0768373012542725}, {"layer": 24, "module": "self_attn.o_proj", "avg_loss": 1.5698563106476315, "time": 1.1625072956085205}, {"layer": 24, "module": "mlp.up_proj", "avg_loss": 277.08426339285717, "time": 1.1091482639312744}, {"layer": 24, "module": "mlp.gate_proj", "avg_loss": 275.1543898809524, "time": 1.1190366744995117}, {"layer": 24, "module": "mlp.down_proj", "avg_loss": 10.153656490265377, "time": 3.0587987899780273}, {"layer": 25, "module": "self_attn.k_proj", "avg_loss": 44.53843470982143, "time": 1.0773849487304688}, {"layer": 25, "module": "self_attn.v_proj", "avg_loss": 8.323689778645834, "time": 1.1772735118865967}, {"layer": 25, "module": "self_attn.q_proj", "avg_loss": 167.6624968998016, "time": 1.1399331092834473}, {"layer": 25, "module": "self_attn.o_proj", "avg_loss": 2.2654622395833335, "time": 1.072944164276123}, {"layer": 25, "module": "mlp.up_proj", "avg_loss": 204.52542162698413, "time": 1.1676928997039795}, {"layer": 25, "module": "mlp.gate_proj", "avg_loss": 206.36635044642858, "time": 1.2079708576202393}, {"layer": 25, "module": "mlp.down_proj", "avg_loss": 7.261378212580605, "time": 3.102728843688965}, {"layer": 26, "module": "self_attn.k_proj", "avg_loss": 57.22313678075397, "time": 1.0960547924041748}, {"layer": 26, "module": "self_attn.v_proj", "avg_loss": 11.811349051339286, "time": 1.0557506084442139}, {"layer": 26, "module": "self_attn.q_proj", "avg_loss": 229.7282521081349, "time": 1.0686531066894531}, {"layer": 26, "module": "self_attn.o_proj", "avg_loss": 1.8469502282521082, "time": 1.105036735534668}, {"layer": 26, "module": "mlp.up_proj", "avg_loss": 200.59716021825398, "time": 1.1922357082366943}, {"layer": 26, "module": "mlp.gate_proj", "avg_loss": 210.05530753968253, "time": 1.160167932510376}, {"layer": 26, "module": "mlp.down_proj", "avg_loss": 8.804235064794147, "time": 3.23789119720459}, {"layer": 27, "module": "self_attn.k_proj", "avg_loss": 55.712038070436506, "time": 1.1707193851470947}, {"layer": 27, "module": "self_attn.v_proj", "avg_loss": 10.864472888764881, "time": 1.2090461254119873}, {"layer": 27, "module": "self_attn.q_proj", "avg_loss": 224.89403521825398, "time": 1.1479313373565674}, {"layer": 27, "module": "self_attn.o_proj", "avg_loss": 3.2796790713355657, "time": 1.2092041969299316}, {"layer": 27, "module": "mlp.up_proj", "avg_loss": 207.97811259920636, "time": 1.2634170055389404}, {"layer": 27, "module": "mlp.gate_proj", "avg_loss": 213.8538566468254, "time": 1.1929430961608887}, {"layer": 27, "module": "mlp.down_proj", "avg_loss": 9.026755681113592, "time": 3.374363660812378}, {"layer": 28, "module": "self_attn.k_proj", "avg_loss": 68.0913318452381, "time": 1.1452600955963135}, {"layer": 28, "module": "self_attn.v_proj", "avg_loss": 13.143016028025794, "time": 1.1598966121673584}, {"layer": 28, "module": "self_attn.q_proj", "avg_loss": 314.308345734127, "time": 1.2207612991333008}, {"layer": 28, "module": "self_attn.o_proj", "avg_loss": 2.6906602647569446, "time": 1.2889342308044434}, {"layer": 28, "module": "mlp.up_proj", "avg_loss": 228.7674541170635, "time": 1.2556931972503662}, {"layer": 28, "module": "mlp.gate_proj", "avg_loss": 234.24262152777777, "time": 1.2476210594177246}, {"layer": 28, "module": "mlp.down_proj", "avg_loss": 10.512238033234127, "time": 3.3353371620178223}, {"layer": 29, "module": "self_attn.k_proj", "avg_loss": 49.09798952132937, "time": 1.2062287330627441}, {"layer": 29, "module": "self_attn.v_proj", "avg_loss": 11.920479910714286, "time": 1.1428861618041992}, {"layer": 29, "module": "self_attn.q_proj", "avg_loss": 211.96670386904762, "time": 1.2172672748565674}, {"layer": 29, "module": "self_attn.o_proj", "avg_loss": 2.8466879224020336, "time": 1.1749467849731445}, {"layer": 29, "module": "mlp.up_proj", "avg_loss": 233.6137462797619, "time": 1.2748653888702393}, {"layer": 29, "module": "mlp.gate_proj", "avg_loss": 234.89536830357142, "time": 1.2958037853240967}, {"layer": 29, "module": "mlp.down_proj", "avg_loss": 11.879153297061013, "time": 3.024456024169922}, {"layer": 30, "module": "self_attn.k_proj", "avg_loss": 67.4972408234127, "time": 1.1801671981811523}, {"layer": 30, "module": "self_attn.v_proj", "avg_loss": 9.56280033172123, "time": 1.1655683517456055}, {"layer": 30, "module": "self_attn.q_proj", "avg_loss": 274.40736607142856, "time": 1.1351878643035889}, {"layer": 30, "module": "self_attn.o_proj", "avg_loss": 3.709384494357639, "time": 1.2057774066925049}, {"layer": 30, "module": "mlp.up_proj", "avg_loss": 238.2511470734127, "time": 1.1626017093658447}, {"layer": 30, "module": "mlp.gate_proj", "avg_loss": 233.25390625, "time": 1.1402926445007324}, {"layer": 30, "module": "mlp.down_proj", "avg_loss": 12.208286830357142, "time": 3.218200206756592}, {"layer": 31, "module": "self_attn.k_proj", "avg_loss": 65.59971788194444, "time": 1.2051424980163574}, {"layer": 31, "module": "self_attn.v_proj", "avg_loss": 16.00059097532242, "time": 1.144580602645874}, {"layer": 31, "module": "self_attn.q_proj", "avg_loss": 315.8028273809524, "time": 1.1337432861328125}, {"layer": 31, "module": "self_attn.o_proj", "avg_loss": 3.0362296937003967, "time": 1.1752159595489502}, {"layer": 31, "module": "mlp.up_proj", "avg_loss": 251.59502108134922, "time": 1.1779651641845703}, {"layer": 31, "module": "mlp.gate_proj", "avg_loss": 244.93560887896825, "time": 1.2300786972045898}, {"layer": 31, "module": "mlp.down_proj", "avg_loss": 13.784346323164682, "time": 3.2940175533294678}, {"layer": 32, "module": "self_attn.k_proj", "avg_loss": 59.26835317460318, "time": 1.1966123580932617}, {"layer": 32, "module": "self_attn.v_proj", "avg_loss": 17.130985320560516, "time": 1.1872398853302002}, {"layer": 32, "module": "self_attn.q_proj", "avg_loss": 280.87441096230157, "time": 1.1585309505462646}, {"layer": 32, "module": "self_attn.o_proj", "avg_loss": 4.106230236235119, "time": 1.1031668186187744}, {"layer": 32, "module": "mlp.up_proj", "avg_loss": 276.12152777777777, "time": 1.1782047748565674}, {"layer": 32, "module": "mlp.gate_proj", "avg_loss": 262.2576884920635, "time": 1.2049543857574463}, {"layer": 32, "module": "mlp.down_proj", "avg_loss": 15.278709774925595, "time": 3.161099672317505}, {"layer": 33, "module": "self_attn.k_proj", "avg_loss": 60.953396267361114, "time": 1.116408348083496}, {"layer": 33, "module": "self_attn.v_proj", "avg_loss": 17.971575055803573, "time": 1.113811731338501}, {"layer": 33, "module": "self_attn.q_proj", "avg_loss": 298.7875744047619, "time": 1.1659135818481445}, {"layer": 33, "module": "self_attn.o_proj", "avg_loss": 4.0333617679656495, "time": 1.16306471824646}, {"layer": 33, "module": "mlp.up_proj", "avg_loss": 289.92181299603175, "time": 1.2376830577850342}, {"layer": 33, "module": "mlp.gate_proj", "avg_loss": 273.39251612103175, "time": 1.1068203449249268}, {"layer": 33, "module": "mlp.down_proj", "avg_loss": 19.963070824032737, "time": 3.4880740642547607}, {"layer": 34, "module": "self_attn.k_proj", "avg_loss": 61.39093501984127, "time": 1.1488792896270752}, {"layer": 34, "module": "self_attn.v_proj", "avg_loss": 18.561883835565474, "time": 1.1890428066253662}, {"layer": 34, "module": "self_attn.q_proj", "avg_loss": 322.77597966269843, "time": 1.2030396461486816}, {"layer": 34, "module": "self_attn.o_proj", "avg_loss": 6.396072629898313, "time": 1.1853256225585938}, {"layer": 34, "module": "mlp.up_proj", "avg_loss": 297.66778273809524, "time": 1.2051937580108643}, {"layer": 34, "module": "mlp.gate_proj", "avg_loss": 270.3607700892857, "time": 1.2082383632659912}, {"layer": 34, "module": "mlp.down_proj", "avg_loss": 26.59624759734623, "time": 3.240910768508911}, {"layer": 35, "module": "self_attn.k_proj", "avg_loss": 57.025661892361114, "time": 1.0972023010253906}, {"layer": 35, "module": "self_attn.v_proj", "avg_loss": 25.24734157986111, "time": 1.0558063983917236}, {"layer": 35, "module": "self_attn.q_proj", "avg_loss": 318.19211929563494, "time": 1.1481974124908447}, {"layer": 35, "module": "self_attn.o_proj", "avg_loss": 8.037232414124505, "time": 1.1057744026184082}, {"layer": 35, "module": "mlp.up_proj", "avg_loss": 326.1161644345238, "time": 1.2063822746276855}, {"layer": 35, "module": "mlp.gate_proj", "avg_loss": 293.6860739087302, "time": 1.0919268131256104}, {"layer": 35, "module": "mlp.down_proj", "avg_loss": 35.46416558159722, "time": 2.9987568855285645}, {"layer": 36, "module": "self_attn.k_proj", "avg_loss": 60.67793976314484, "time": 1.2392117977142334}, {"layer": 36, "module": "self_attn.v_proj", "avg_loss": 29.76342579675099, "time": 1.1458733081817627}, {"layer": 36, "module": "self_attn.q_proj", "avg_loss": 320.2403273809524, "time": 1.1857309341430664}, {"layer": 36, "module": "self_attn.o_proj", "avg_loss": 8.933012462797619, "time": 1.2416067123413086}, {"layer": 36, "module": "mlp.up_proj", "avg_loss": 390.06972346230157, "time": 1.1489057540893555}, {"layer": 36, "module": "mlp.gate_proj", "avg_loss": 358.88250248015873, "time": 1.1012389659881592}, {"layer": 36, "module": "mlp.down_proj", "avg_loss": 44.235630580357146, "time": 3.0453708171844482}, {"layer": 37, "module": "self_attn.k_proj", "avg_loss": 62.75977337549603, "time": 1.114856243133545}, {"layer": 37, "module": "self_attn.v_proj", "avg_loss": 26.252038380456348, "time": 1.1514129638671875}, {"layer": 37, "module": "self_attn.q_proj", "avg_loss": 333.6838107638889, "time": 1.1166694164276123}, {"layer": 37, "module": "self_attn.o_proj", "avg_loss": 6.629325261191716, "time": 1.147444486618042}, {"layer": 37, "module": "mlp.up_proj", "avg_loss": 443.1982886904762, "time": 1.1109850406646729}, {"layer": 37, "module": "mlp.gate_proj", "avg_loss": 413.88504464285717, "time": 1.1010398864746094}, {"layer": 37, "module": "mlp.down_proj", "avg_loss": 62.08853391617063, "time": 3.054128408432007}, {"layer": 38, "module": "self_attn.k_proj", "avg_loss": 61.63746667286706, "time": 1.2461259365081787}, {"layer": 38, "module": "self_attn.v_proj", "avg_loss": 31.39532955109127, "time": 1.1086301803588867}, {"layer": 38, "module": "self_attn.q_proj", "avg_loss": 333.8924541170635, "time": 1.1449623107910156}, {"layer": 38, "module": "self_attn.o_proj", "avg_loss": 7.682955787295387, "time": 1.163386344909668}, {"layer": 38, "module": "mlp.up_proj", "avg_loss": 487.459232390873, "time": 1.1549019813537598}, {"layer": 38, "module": "mlp.gate_proj", "avg_loss": 467.4761904761905, "time": 1.1277477741241455}, {"layer": 38, "module": "mlp.down_proj", "avg_loss": 80.21190631200396, "time": 3.0373194217681885}, {"layer": 39, "module": "self_attn.k_proj", "avg_loss": 60.98892066592262, "time": 1.1146299839019775}, {"layer": 39, "module": "self_attn.v_proj", "avg_loss": 44.13232421875, "time": 1.1228981018066406}, {"layer": 39, "module": "self_attn.q_proj", "avg_loss": 357.1535218253968, "time": 1.1391727924346924}, {"layer": 39, "module": "self_attn.o_proj", "avg_loss": 14.891145252046131, "time": 1.1029539108276367}, {"layer": 39, "module": "mlp.up_proj", "avg_loss": 587.7054191468254, "time": 1.1430373191833496}, {"layer": 39, "module": "mlp.gate_proj", "avg_loss": 557.6582341269841, "time": 1.1813938617706299}, {"layer": 39, "module": "mlp.down_proj", "avg_loss": 123.22123015873017, "time": 3.444505453109741}, {"layer": 40, "module": "self_attn.k_proj", "avg_loss": 60.42896670386905, "time": 1.1138770580291748}, {"layer": 40, "module": "self_attn.v_proj", "avg_loss": 57.14385695684524, "time": 1.1392202377319336}, {"layer": 40, "module": "self_attn.q_proj", "avg_loss": 392.52675471230157, "time": 1.1060245037078857}, {"layer": 40, "module": "self_attn.o_proj", "avg_loss": 14.749141632564484, "time": 1.1186439990997314}, {"layer": 40, "module": "mlp.up_proj", "avg_loss": 661.8585069444445, "time": 1.1849796772003174}, {"layer": 40, "module": "mlp.gate_proj", "avg_loss": 623.1515376984127, "time": 1.1035432815551758}, {"layer": 40, "module": "mlp.down_proj", "avg_loss": 150.149166046627, "time": 3.11645770072937}, {"layer": 41, "module": "self_attn.k_proj", "avg_loss": 60.042992001488095, "time": 1.1098363399505615}, {"layer": 41, "module": "self_attn.v_proj", "avg_loss": 66.02926587301587, "time": 1.1204557418823242}, {"layer": 41, "module": "self_attn.q_proj", "avg_loss": 387.88194444444446, "time": 1.1130077838897705}, {"layer": 41, "module": "self_attn.o_proj", "avg_loss": 23.6735103546627, "time": 1.1380350589752197}, {"layer": 41, "module": "mlp.up_proj", "avg_loss": 753.8501364087301, "time": 1.1195154190063477}, {"layer": 41, "module": "mlp.gate_proj", "avg_loss": 698.6078249007936, "time": 1.1038756370544434}, {"layer": 41, "module": "mlp.down_proj", "avg_loss": 202.970703125, "time": 3.285126209259033}, {"layer": 42, "module": "self_attn.k_proj", "avg_loss": 55.17739722842262, "time": 1.1255366802215576}, {"layer": 42, "module": "self_attn.v_proj", "avg_loss": 99.33572048611111, "time": 1.153285264968872}, {"layer": 42, "module": "self_attn.q_proj", "avg_loss": 397.8693576388889, "time": 1.1155972480773926}, {"layer": 42, "module": "self_attn.o_proj", "avg_loss": 25.510689871651785, "time": 1.2227964401245117}, {"layer": 42, "module": "mlp.up_proj", "avg_loss": 821.5654761904761, "time": 1.1283934116363525}, {"layer": 42, "module": "mlp.gate_proj", "avg_loss": 755.2321428571429, "time": 1.0959265232086182}, {"layer": 42, "module": "mlp.down_proj", "avg_loss": 250.56684027777777, "time": 2.994374990463257}, {"layer": 43, "module": "self_attn.k_proj", "avg_loss": 65.62019469246032, "time": 1.1367888450622559}, {"layer": 43, "module": "self_attn.v_proj", "avg_loss": 103.57807849702381, "time": 1.1497259140014648}, {"layer": 43, "module": "self_attn.q_proj", "avg_loss": 433.49110243055554, "time": 1.1307244300842285}, {"layer": 43, "module": "self_attn.o_proj", "avg_loss": 27.93227229042659, "time": 1.0953280925750732}, {"layer": 43, "module": "mlp.up_proj", "avg_loss": 936.156498015873, "time": 1.223813533782959}, {"layer": 43, "module": "mlp.gate_proj", "avg_loss": 842.6703869047619, "time": 1.1545615196228027}, {"layer": 43, "module": "mlp.down_proj", "avg_loss": 307.3566468253968, "time": 3.076416015625}, {"layer": 44, "module": "self_attn.k_proj", "avg_loss": 64.67856367807539, "time": 1.148991584777832}, {"layer": 44, "module": "self_attn.v_proj", "avg_loss": 133.80022321428572, "time": 1.0575401782989502}, {"layer": 44, "module": "self_attn.q_proj", "avg_loss": 452.4096912202381, "time": 1.063589096069336}, {"layer": 44, "module": "self_attn.o_proj", "avg_loss": 24.305259099082342, "time": 1.0778491497039795}, {"layer": 44, "module": "mlp.up_proj", "avg_loss": 1038.9070560515872, "time": 1.1586480140686035}, {"layer": 44, "module": "mlp.gate_proj", "avg_loss": 916.0292658730159, "time": 1.1623876094818115}, {"layer": 44, "module": "mlp.down_proj", "avg_loss": 368.943359375, "time": 3.1816890239715576}, {"layer": 45, "module": "self_attn.k_proj", "avg_loss": 57.84093269469246, "time": 1.1365206241607666}, {"layer": 45, "module": "self_attn.v_proj", "avg_loss": 175.05764818948413, "time": 1.1095149517059326}, {"layer": 45, "module": "self_attn.q_proj", "avg_loss": 450.286613343254, "time": 1.0905654430389404}, {"layer": 45, "module": "self_attn.o_proj", "avg_loss": 39.24590773809524, "time": 1.0676183700561523}, {"layer": 45, "module": "mlp.up_proj", "avg_loss": 1160.063988095238, "time": 1.1477103233337402}, {"layer": 45, "module": "mlp.gate_proj", "avg_loss": 1016.5353422619048, "time": 1.200683832168579}, {"layer": 45, "module": "mlp.down_proj", "avg_loss": 401.7080543154762, "time": 3.078281879425049}, {"layer": 46, "module": "self_attn.k_proj", "avg_loss": 55.581666976686506, "time": 1.1098747253417969}, {"layer": 46, "module": "self_attn.v_proj", "avg_loss": 133.79853360615078, "time": 1.0754835605621338}, {"layer": 46, "module": "self_attn.q_proj", "avg_loss": 399.1354476686508, "time": 1.115839958190918}, {"layer": 46, "module": "self_attn.o_proj", "avg_loss": 24.472615559895832, "time": 1.131922721862793}, {"layer": 46, "module": "mlp.up_proj", "avg_loss": 1294.6615823412699, "time": 1.1163275241851807}, {"layer": 46, "module": "mlp.gate_proj", "avg_loss": 1124.0228174603174, "time": 1.1561198234558105}, {"layer": 46, "module": "mlp.down_proj", "avg_loss": 470.13479662698415, "time": 2.9696736335754395}, {"layer": 47, "module": "self_attn.k_proj", "avg_loss": 53.34169611855159, "time": 1.0764660835266113}, {"layer": 47, "module": "self_attn.v_proj", "avg_loss": 124.41122581845238, "time": 1.079728603363037}, {"layer": 47, "module": "self_attn.q_proj", "avg_loss": 413.22454737103175, "time": 1.07657790184021}, {"layer": 47, "module": "self_attn.o_proj", "avg_loss": 81.59491257440476, "time": 1.07061767578125}, {"layer": 47, "module": "mlp.up_proj", "avg_loss": 1356.585689484127, "time": 1.1052932739257812}, {"layer": 47, "module": "mlp.gate_proj", "avg_loss": 1179.6619543650793, "time": 1.1666970252990723}, {"layer": 47, "module": "mlp.down_proj", "avg_loss": 616.523685515873, "time": 3.096477746963501}, {"layer": 48, "module": "self_attn.k_proj", "avg_loss": 48.05840773809524, "time": 1.1547579765319824}, {"layer": 48, "module": "self_attn.v_proj", "avg_loss": 99.7431640625, "time": 1.1208021640777588}, {"layer": 48, "module": "self_attn.q_proj", "avg_loss": 368.58655753968253, "time": 1.1236627101898193}, {"layer": 48, "module": "self_attn.o_proj", "avg_loss": 64.45728701636905, "time": 1.0869407653808594}, {"layer": 48, "module": "mlp.up_proj", "avg_loss": 1434.7521081349207, "time": 1.101623773574829}, {"layer": 48, "module": "mlp.gate_proj", "avg_loss": 1293.7516121031747, "time": 1.1288175582885742}, {"layer": 48, "module": "mlp.down_proj", "avg_loss": 1785.866691468254, "time": 2.9995808601379395}]