Yi-1.5-34B-Chat-4bit-gptq / quant_log.json
LRL
Init Model
64b0cb4
[{"layer": 1, "module": "self_attn.k_proj", "avg_loss": 5.352675180586558, "time": 2.5457825660705566}, {"layer": 1, "module": "self_attn.v_proj", "avg_loss": 0.13389753916906932, "time": 2.2157156467437744}, {"layer": 1, "module": "self_attn.q_proj", "avg_loss": 4.476159474206349, "time": 2.399394989013672}, {"layer": 1, "module": "self_attn.o_proj", "avg_loss": 0.0031054668010227263, "time": 2.159714937210083}, {"layer": 1, "module": "mlp.up_proj", "avg_loss": 0.6244259788876488, "time": 2.1207478046417236}, {"layer": 1, "module": "mlp.gate_proj", "avg_loss": 0.6364760625930059, "time": 2.1242661476135254}, {"layer": 1, "module": "mlp.down_proj", "avg_loss": 0.0017594155811128161, "time": 8.541720867156982}, {"layer": 2, "module": "self_attn.k_proj", "avg_loss": 0.8723975893050905, "time": 3.272671937942505}, {"layer": 2, "module": "self_attn.v_proj", "avg_loss": 0.030945327546861436, "time": 2.389962911605835}, {"layer": 2, "module": "self_attn.q_proj", "avg_loss": 1.3866521441747273, "time": 2.0651729106903076}, {"layer": 2, "module": "self_attn.o_proj", "avg_loss": 0.00036874540623218295, "time": 3.0575196743011475}, {"layer": 2, "module": "mlp.up_proj", "avg_loss": 1.0557290940057664, "time": 2.6198625564575195}, {"layer": 2, "module": "mlp.gate_proj", "avg_loss": 1.0934615362258184, "time": 2.961115598678589}, {"layer": 2, "module": "mlp.down_proj", "avg_loss": 0.013057500597030397, "time": 7.506185531616211}, {"layer": 3, "module": "self_attn.k_proj", "avg_loss": 0.489898439437624, "time": 3.183738946914673}, {"layer": 3, "module": "self_attn.v_proj", "avg_loss": 0.08028268057202535, "time": 1.9978737831115723}, {"layer": 3, "module": "self_attn.q_proj", "avg_loss": 1.0803394620380704, "time": 2.816321611404419}, {"layer": 3, "module": "self_attn.o_proj", "avg_loss": 0.00876480435568189, "time": 3.149240016937256}, {"layer": 3, "module": "mlp.up_proj", "avg_loss": 2.3247491745721724, "time": 2.867182493209839}, {"layer": 3, "module": "mlp.gate_proj", "avg_loss": 2.3451891702318948, "time": 2.4137649536132812}, {"layer": 3, "module": "mlp.down_proj", "avg_loss": 7.370079403831845, "time": 9.667740106582642}, {"layer": 4, "module": "self_attn.k_proj", "avg_loss": 2.0101394653320312, "time": 2.0382983684539795}, {"layer": 4, "module": "self_attn.v_proj", "avg_loss": 0.4787936740451389, "time": 1.9998931884765625}, {"layer": 4, "module": "self_attn.q_proj", "avg_loss": 5.650406125992063, "time": 2.0842807292938232}, {"layer": 4, "module": "self_attn.o_proj", "avg_loss": 0.08738137805272662, "time": 2.1177215576171875}, {"layer": 4, "module": "mlp.up_proj", "avg_loss": 6.518628801618304, "time": 3.188063144683838}, {"layer": 4, "module": "mlp.gate_proj", "avg_loss": 6.464323013547867, "time": 3.1364805698394775}, {"layer": 4, "module": "mlp.down_proj", "avg_loss": 0.1583853222074963, "time": 6.951603412628174}, {"layer": 5, "module": "self_attn.k_proj", "avg_loss": 2.1578158424014138, "time": 1.9238872528076172}, {"layer": 5, "module": "self_attn.v_proj", "avg_loss": 0.9234596131339906, "time": 1.8428776264190674}, {"layer": 5, "module": "self_attn.q_proj", "avg_loss": 5.730054098462301, "time": 1.9038681983947754}, {"layer": 5, "module": "self_attn.o_proj", "avg_loss": 0.022232861745925175, "time": 1.9657864570617676}, {"layer": 5, "module": "mlp.up_proj", "avg_loss": 14.477413116939484, "time": 2.2568864822387695}, {"layer": 5, "module": "mlp.gate_proj", "avg_loss": 14.661653064546131, "time": 2.3405675888061523}, {"layer": 5, "module": "mlp.down_proj", "avg_loss": 0.25524254450722345, "time": 7.110980272293091}, {"layer": 6, "module": "self_attn.k_proj", "avg_loss": 2.620084732297867, "time": 2.2834157943725586}, {"layer": 6, "module": "self_attn.v_proj", "avg_loss": 1.0401101490807911, "time": 2.170969247817993}, {"layer": 6, "module": "self_attn.q_proj", "avg_loss": 7.287877642919147, "time": 2.169440984725952}, {"layer": 6, "module": "self_attn.o_proj", "avg_loss": 0.09028500602358863, "time": 1.9799864292144775}, {"layer": 6, "module": "mlp.up_proj", "avg_loss": 20.637532552083332, "time": 2.1252171993255615}, {"layer": 6, "module": "mlp.gate_proj", "avg_loss": 21.073753720238095, "time": 2.1808342933654785}, {"layer": 6, "module": "mlp.down_proj", "avg_loss": 0.35704509795658174, "time": 6.94711446762085}, {"layer": 7, "module": "self_attn.k_proj", "avg_loss": 8.05464099702381, "time": 2.09403133392334}, {"layer": 7, "module": "self_attn.v_proj", "avg_loss": 2.40505860343812, "time": 2.1769185066223145}, {"layer": 7, "module": "self_attn.q_proj", "avg_loss": 23.639388795882937, "time": 1.9873888492584229}, {"layer": 7, "module": "self_attn.o_proj", "avg_loss": 0.26360648018973215, "time": 2.0446383953094482}, {"layer": 7, "module": "mlp.up_proj", "avg_loss": 31.92754448784722, "time": 2.075176477432251}, {"layer": 7, "module": "mlp.gate_proj", "avg_loss": 32.71322389632937, "time": 1.996262550354004}, {"layer": 7, "module": "mlp.down_proj", "avg_loss": 0.6238175649491567, "time": 6.546977996826172}, {"layer": 8, "module": "self_attn.k_proj", "avg_loss": 9.314669170076884, "time": 2.0263328552246094}, {"layer": 8, "module": "self_attn.v_proj", "avg_loss": 2.5825345478360613, "time": 1.9527411460876465}, {"layer": 8, "module": "self_attn.q_proj", "avg_loss": 28.955233134920636, "time": 2.3074488639831543}, {"layer": 8, "module": "self_attn.o_proj", "avg_loss": 0.3693157074943421, "time": 2.1339774131774902}, {"layer": 8, "module": "mlp.up_proj", "avg_loss": 47.792945498511905, "time": 2.1569526195526123}, {"layer": 8, "module": "mlp.gate_proj", "avg_loss": 48.99856615823413, "time": 2.1533212661743164}, {"layer": 8, "module": "mlp.down_proj", "avg_loss": 1.0512735275995164, "time": 6.662954807281494}, {"layer": 9, "module": "self_attn.k_proj", "avg_loss": 8.495572529141866, "time": 2.199244976043701}, {"layer": 9, "module": "self_attn.v_proj", "avg_loss": 3.404289366706969, "time": 2.710068464279175}, {"layer": 9, "module": "self_attn.q_proj", "avg_loss": 25.87343439980159, "time": 2.7377583980560303}, {"layer": 9, "module": "self_attn.o_proj", "avg_loss": 0.16249947320847286, "time": 2.1193313598632812}, {"layer": 9, "module": "mlp.up_proj", "avg_loss": 62.42733134920635, "time": 2.3371987342834473}, {"layer": 9, "module": "mlp.gate_proj", "avg_loss": 64.26760912698413, "time": 2.196030855178833}, {"layer": 9, "module": "mlp.down_proj", "avg_loss": 1.3513081868489583, "time": 6.868269681930542}, {"layer": 10, "module": "self_attn.k_proj", "avg_loss": 15.778831845238095, "time": 2.1208229064941406}, {"layer": 10, "module": "self_attn.v_proj", "avg_loss": 5.327614920479911, "time": 2.1446030139923096}, {"layer": 10, "module": "self_attn.q_proj", "avg_loss": 53.795038132440474, "time": 2.3207740783691406}, {"layer": 10, "module": "self_attn.o_proj", "avg_loss": 0.3753324236188616, "time": 2.1635751724243164}, {"layer": 10, "module": "mlp.up_proj", "avg_loss": 79.3354724702381, "time": 2.297067403793335}, {"layer": 10, "module": "mlp.gate_proj", "avg_loss": 82.1301773313492, "time": 2.090648651123047}, {"layer": 10, "module": "mlp.down_proj", "avg_loss": 1.9281218998015872, "time": 6.636880874633789}, {"layer": 11, "module": "self_attn.k_proj", "avg_loss": 18.554722377232142, "time": 2.056863307952881}, {"layer": 11, "module": "self_attn.v_proj", "avg_loss": 6.074541364397321, "time": 2.0503828525543213}, {"layer": 11, "module": "self_attn.q_proj", "avg_loss": 65.51035466269842, "time": 2.0760035514831543}, {"layer": 11, "module": "self_attn.o_proj", "avg_loss": 0.4575565883091518, "time": 2.2344119548797607}, {"layer": 11, "module": "mlp.up_proj", "avg_loss": 86.83082992311508, "time": 2.182546854019165}, {"layer": 11, "module": "mlp.gate_proj", "avg_loss": 89.6674572172619, "time": 2.3126561641693115}, {"layer": 11, "module": "mlp.down_proj", "avg_loss": 2.343931652250744, "time": 6.817188024520874}, {"layer": 12, "module": "self_attn.k_proj", "avg_loss": 34.126875620039684, "time": 2.118962049484253}, {"layer": 12, "module": "self_attn.v_proj", "avg_loss": 10.643899584573413, "time": 1.999316692352295}, {"layer": 12, "module": "self_attn.q_proj", "avg_loss": 131.94844370039684, "time": 2.1530845165252686}, {"layer": 12, "module": "self_attn.o_proj", "avg_loss": 0.984809814937531, "time": 2.1864984035491943}, {"layer": 12, "module": "mlp.up_proj", "avg_loss": 113.31896391369048, "time": 2.2849512100219727}, {"layer": 12, "module": "mlp.gate_proj", "avg_loss": 116.94056144593254, "time": 2.235769033432007}, {"layer": 12, "module": "mlp.down_proj", "avg_loss": 3.3877645825582836, "time": 7.217832803726196}, {"layer": 13, "module": "self_attn.k_proj", "avg_loss": 35.572432260664684, "time": 2.415557861328125}, {"layer": 13, "module": "self_attn.v_proj", "avg_loss": 12.184922960069445, "time": 2.050490617752075}, {"layer": 13, "module": "self_attn.q_proj", "avg_loss": 142.60072544642858, "time": 2.4656178951263428}, {"layer": 13, "module": "self_attn.o_proj", "avg_loss": 1.0269754803369915, "time": 2.207429885864258}, {"layer": 13, "module": "mlp.up_proj", "avg_loss": 142.36318824404762, "time": 2.2090272903442383}, {"layer": 13, "module": "mlp.gate_proj", "avg_loss": 147.04489087301587, "time": 2.7891242504119873}, {"layer": 13, "module": "mlp.down_proj", "avg_loss": 4.629325261191716, "time": 6.986166477203369}, {"layer": 14, "module": "self_attn.k_proj", "avg_loss": 31.207236638144842, "time": 2.1986918449401855}, {"layer": 14, "module": "self_attn.v_proj", "avg_loss": 13.28580341641865, "time": 2.3619863986968994}, {"layer": 14, "module": "self_attn.q_proj", "avg_loss": 124.32797774057539, "time": 2.0578954219818115}, {"layer": 14, "module": "self_attn.o_proj", "avg_loss": 0.7353095402793278, "time": 2.549999475479126}, {"layer": 14, "module": "mlp.up_proj", "avg_loss": 169.75354972718253, "time": 2.4990320205688477}, {"layer": 14, "module": "mlp.gate_proj", "avg_loss": 176.46716889880952, "time": 3.6217963695526123}, {"layer": 14, "module": "mlp.down_proj", "avg_loss": 5.519689166356647, "time": 7.642864942550659}, {"layer": 15, "module": "self_attn.k_proj", "avg_loss": 43.763400607638886, "time": 2.3508238792419434}, {"layer": 15, "module": "self_attn.v_proj", "avg_loss": 16.08542984250992, "time": 2.1609179973602295}, {"layer": 15, "module": "self_attn.q_proj", "avg_loss": 175.5633060515873, "time": 2.13018536567688}, {"layer": 15, "module": "self_attn.o_proj", "avg_loss": 0.937774416000124, "time": 2.8777458667755127}, {"layer": 15, "module": "mlp.up_proj", "avg_loss": 201.35540674603175, "time": 2.5363967418670654}, {"layer": 15, "module": "mlp.gate_proj", "avg_loss": 210.3000682043651, "time": 2.391981840133667}, {"layer": 15, "module": "mlp.down_proj", "avg_loss": 6.780672588045635, "time": 8.220483541488647}, {"layer": 16, "module": "self_attn.k_proj", "avg_loss": 37.83543371775794, "time": 2.8494999408721924}, {"layer": 16, "module": "self_attn.v_proj", "avg_loss": 14.965320405505953, "time": 2.7592599391937256}, {"layer": 16, "module": "self_attn.q_proj", "avg_loss": 147.0636470734127, "time": 3.1039795875549316}, {"layer": 16, "module": "self_attn.o_proj", "avg_loss": 1.1516116914295016, "time": 2.678079128265381}, {"layer": 16, "module": "mlp.up_proj", "avg_loss": 229.59037078373015, "time": 2.687710762023926}, {"layer": 16, "module": "mlp.gate_proj", "avg_loss": 240.8960193452381, "time": 2.8952298164367676}, {"layer": 16, "module": "mlp.down_proj", "avg_loss": 8.092887757316468, "time": 8.542986154556274}, {"layer": 17, "module": "self_attn.k_proj", "avg_loss": 50.97136966765873, "time": 2.368778705596924}, {"layer": 17, "module": "self_attn.v_proj", "avg_loss": 16.995715913318453, "time": 2.1888599395751953}, {"layer": 17, "module": "self_attn.q_proj", "avg_loss": 201.63223896329364, "time": 2.509805202484131}, {"layer": 17, "module": "self_attn.o_proj", "avg_loss": 1.443602546812996, "time": 2.4869816303253174}, {"layer": 17, "module": "mlp.up_proj", "avg_loss": 244.1637214781746, "time": 2.676164388656616}, {"layer": 17, "module": "mlp.gate_proj", "avg_loss": 262.6971726190476, "time": 3.0328330993652344}, {"layer": 17, "module": "mlp.down_proj", "avg_loss": 8.970226469494047, "time": 7.8814685344696045}, {"layer": 18, "module": "self_attn.k_proj", "avg_loss": 48.53113761780754, "time": 2.246159553527832}, {"layer": 18, "module": "self_attn.v_proj", "avg_loss": 20.305416046626984, "time": 2.337399959564209}, {"layer": 18, "module": "self_attn.q_proj", "avg_loss": 215.19180927579364, "time": 2.3759331703186035}, {"layer": 18, "module": "self_attn.o_proj", "avg_loss": 1.8834865509517609, "time": 2.3334567546844482}, {"layer": 18, "module": "mlp.up_proj", "avg_loss": 272.4866691468254, "time": 3.319779634475708}, {"layer": 18, "module": "mlp.gate_proj", "avg_loss": 290.0613219246032, "time": 2.993605136871338}, {"layer": 18, "module": "mlp.down_proj", "avg_loss": 10.608975849454366, "time": 6.99455189704895}, {"layer": 19, "module": "self_attn.k_proj", "avg_loss": 52.32152932787698, "time": 2.1018946170806885}, {"layer": 19, "module": "self_attn.v_proj", "avg_loss": 21.76708984375, "time": 2.255772590637207}, {"layer": 19, "module": "self_attn.q_proj", "avg_loss": 226.58006262400792, "time": 2.3911139965057373}, {"layer": 19, "module": "self_attn.o_proj", "avg_loss": 2.1412256634424605, "time": 2.1422195434570312}, {"layer": 19, "module": "mlp.up_proj", "avg_loss": 328.1527157738095, "time": 2.3690097332000732}, {"layer": 19, "module": "mlp.gate_proj", "avg_loss": 352.2989211309524, "time": 2.3150594234466553}, {"layer": 19, "module": "mlp.down_proj", "avg_loss": 14.232119605654763, "time": 6.933509111404419}, {"layer": 20, "module": "self_attn.k_proj", "avg_loss": 55.77332899305556, "time": 2.3585855960845947}, {"layer": 20, "module": "self_attn.v_proj", "avg_loss": 29.46698676215278, "time": 2.3735766410827637}, {"layer": 20, "module": "self_attn.q_proj", "avg_loss": 266.4409412202381, "time": 1.9702329635620117}, {"layer": 20, "module": "self_attn.o_proj", "avg_loss": 2.175323970734127, "time": 2.146240711212158}, {"layer": 20, "module": "mlp.up_proj", "avg_loss": 385.65916418650795, "time": 2.2081491947174072}, {"layer": 20, "module": "mlp.gate_proj", "avg_loss": 415.8230406746032, "time": 2.4151508808135986}, {"layer": 20, "module": "mlp.down_proj", "avg_loss": 19.91178966703869, "time": 6.677979946136475}, {"layer": 21, "module": "self_attn.k_proj", "avg_loss": 80.23887028769842, "time": 2.391807794570923}, {"layer": 21, "module": "self_attn.v_proj", "avg_loss": 33.97061011904762, "time": 2.38171124458313}, {"layer": 21, "module": "self_attn.q_proj", "avg_loss": 382.9792906746032, "time": 2.1479828357696533}, {"layer": 21, "module": "self_attn.o_proj", "avg_loss": 3.514804900638641, "time": 2.317312002182007}, {"layer": 21, "module": "mlp.up_proj", "avg_loss": 453.4776165674603, "time": 2.1369853019714355}, {"layer": 21, "module": "mlp.gate_proj", "avg_loss": 506.9058779761905, "time": 2.1972460746765137}, {"layer": 21, "module": "mlp.down_proj", "avg_loss": 32.4183349609375, "time": 6.615049123764038}, {"layer": 22, "module": "self_attn.k_proj", "avg_loss": 75.90335131448413, "time": 2.4599740505218506}, {"layer": 22, "module": "self_attn.v_proj", "avg_loss": 27.465587797619047, "time": 1.9603257179260254}, {"layer": 22, "module": "self_attn.q_proj", "avg_loss": 336.49488467261904, "time": 2.2569823265075684}, {"layer": 22, "module": "self_attn.o_proj", "avg_loss": 5.145970904637897, "time": 2.0365090370178223}, {"layer": 22, "module": "mlp.up_proj", "avg_loss": 481.05288938492066, "time": 2.2826924324035645}, {"layer": 22, "module": "mlp.gate_proj", "avg_loss": 522.5994543650794, "time": 2.2367238998413086}, {"layer": 22, "module": "mlp.down_proj", "avg_loss": 31.386036706349206, "time": 7.027307033538818}, {"layer": 23, "module": "self_attn.k_proj", "avg_loss": 73.13326202876983, "time": 2.110029935836792}, {"layer": 23, "module": "self_attn.v_proj", "avg_loss": 25.576842292906747, "time": 2.440505027770996}, {"layer": 23, "module": "self_attn.q_proj", "avg_loss": 320.3154761904762, "time": 2.2368032932281494}, {"layer": 23, "module": "self_attn.o_proj", "avg_loss": 6.807298448350695, "time": 2.14664626121521}, {"layer": 23, "module": "mlp.up_proj", "avg_loss": 539.8999255952381, "time": 2.1670420169830322}, {"layer": 23, "module": "mlp.gate_proj", "avg_loss": 592.8683035714286, "time": 2.3095970153808594}, {"layer": 23, "module": "mlp.down_proj", "avg_loss": 43.73617699032738, "time": 6.789109230041504}, {"layer": 24, "module": "self_attn.k_proj", "avg_loss": 76.84023127480158, "time": 2.1145384311676025}, {"layer": 24, "module": "self_attn.v_proj", "avg_loss": 23.274379185267858, "time": 2.095081329345703}, {"layer": 24, "module": "self_attn.q_proj", "avg_loss": 309.6246279761905, "time": 2.0926408767700195}, {"layer": 24, "module": "self_attn.o_proj", "avg_loss": 7.387056865389385, "time": 2.05888032913208}, {"layer": 24, "module": "mlp.up_proj", "avg_loss": 593.0297619047619, "time": 2.1627912521362305}, {"layer": 24, "module": "mlp.gate_proj", "avg_loss": 618.1878720238095, "time": 2.2899839878082275}, {"layer": 24, "module": "mlp.down_proj", "avg_loss": 46.07567196800595, "time": 7.014447212219238}, {"layer": 25, "module": "self_attn.k_proj", "avg_loss": 91.97733754960318, "time": 2.0840697288513184}, {"layer": 25, "module": "self_attn.v_proj", "avg_loss": 32.36742195250496, "time": 2.1736598014831543}, {"layer": 25, "module": "self_attn.q_proj", "avg_loss": 438.42950148809524, "time": 2.0889174938201904}, {"layer": 25, "module": "self_attn.o_proj", "avg_loss": 9.07851833767361, "time": 2.320512056350708}, {"layer": 25, "module": "mlp.up_proj", "avg_loss": 660.5408606150794, "time": 2.345473527908325}, {"layer": 25, "module": "mlp.gate_proj", "avg_loss": 675.717757936508, "time": 2.0184593200683594}, {"layer": 25, "module": "mlp.down_proj", "avg_loss": 54.07588898189484, "time": 6.973055839538574}, {"layer": 26, "module": "self_attn.k_proj", "avg_loss": 101.75777374751983, "time": 2.126244068145752}, {"layer": 26, "module": "self_attn.v_proj", "avg_loss": 32.61105298239087, "time": 2.1666316986083984}, {"layer": 26, "module": "self_attn.q_proj", "avg_loss": 447.2990451388889, "time": 2.2038824558258057}, {"layer": 26, "module": "self_attn.o_proj", "avg_loss": 14.364691840277779, "time": 2.422959804534912}, {"layer": 26, "module": "mlp.up_proj", "avg_loss": 704.3473462301587, "time": 2.372905969619751}, {"layer": 26, "module": "mlp.gate_proj", "avg_loss": 708.7333209325396, "time": 2.2989752292633057}, {"layer": 26, "module": "mlp.down_proj", "avg_loss": 64.9376705109127, "time": 7.0325446128845215}, {"layer": 27, "module": "self_attn.k_proj", "avg_loss": 91.77731274801587, "time": 2.103760242462158}, {"layer": 27, "module": "self_attn.v_proj", "avg_loss": 30.74524119543651, "time": 2.08034348487854}, {"layer": 27, "module": "self_attn.q_proj", "avg_loss": 413.844246031746, "time": 2.0658745765686035}, {"layer": 27, "module": "self_attn.o_proj", "avg_loss": 19.207504030257937, "time": 2.022977590560913}, {"layer": 27, "module": "mlp.up_proj", "avg_loss": 731.502914186508, "time": 2.425034761428833}, {"layer": 27, "module": "mlp.gate_proj", "avg_loss": 709.3297371031746, "time": 2.280036211013794}, {"layer": 27, "module": "mlp.down_proj", "avg_loss": 72.93081907242063, "time": 6.964323043823242}, {"layer": 28, "module": "self_attn.k_proj", "avg_loss": 114.45960441468254, "time": 2.1853702068328857}, {"layer": 28, "module": "self_attn.v_proj", "avg_loss": 34.186449807787696, "time": 2.2038164138793945}, {"layer": 28, "module": "self_attn.q_proj", "avg_loss": 532.7682911706349, "time": 2.2853848934173584}, {"layer": 28, "module": "self_attn.o_proj", "avg_loss": 19.498291015625, "time": 2.4536030292510986}, {"layer": 28, "module": "mlp.up_proj", "avg_loss": 826.8420758928571, "time": 2.4311368465423584}, {"layer": 28, "module": "mlp.gate_proj", "avg_loss": 795.5232514880952, "time": 2.543236017227173}, {"layer": 28, "module": "mlp.down_proj", "avg_loss": 85.23521980406746, "time": 6.733373403549194}, {"layer": 29, "module": "self_attn.k_proj", "avg_loss": 113.4267810639881, "time": 2.1889612674713135}, {"layer": 29, "module": "self_attn.v_proj", "avg_loss": 43.11337038070437, "time": 2.1915242671966553}, {"layer": 29, "module": "self_attn.q_proj", "avg_loss": 566.0379464285714, "time": 2.216681718826294}, {"layer": 29, "module": "self_attn.o_proj", "avg_loss": 24.19460139198909, "time": 2.2784922122955322}, {"layer": 29, "module": "mlp.up_proj", "avg_loss": 898.7199900793651, "time": 2.249082565307617}, {"layer": 29, "module": "mlp.gate_proj", "avg_loss": 852.2542162698413, "time": 2.2721869945526123}, {"layer": 29, "module": "mlp.down_proj", "avg_loss": 100.14761594742063, "time": 6.619414567947388}, {"layer": 30, "module": "self_attn.k_proj", "avg_loss": 126.55442398313492, "time": 2.077782392501831}, {"layer": 30, "module": "self_attn.v_proj", "avg_loss": 57.483375186011905, "time": 2.0346384048461914}, {"layer": 30, "module": "self_attn.q_proj", "avg_loss": 660.8371775793651, "time": 2.0995516777038574}, {"layer": 30, "module": "self_attn.o_proj", "avg_loss": 28.093331473214285, "time": 2.147719383239746}, {"layer": 30, "module": "mlp.up_proj", "avg_loss": 944.1930803571429, "time": 2.231616497039795}, {"layer": 30, "module": "mlp.gate_proj", "avg_loss": 871.8255208333334, "time": 2.1849615573883057}, {"layer": 30, "module": "mlp.down_proj", "avg_loss": 114.84353298611111, "time": 6.536309242248535}, {"layer": 31, "module": "self_attn.k_proj", "avg_loss": 108.03368365575396, "time": 2.6649467945098877}, {"layer": 31, "module": "self_attn.v_proj", "avg_loss": 53.35635230654762, "time": 2.217255115509033}, {"layer": 31, "module": "self_attn.q_proj", "avg_loss": 552.1469494047619, "time": 2.370621919631958}, {"layer": 31, "module": "self_attn.o_proj", "avg_loss": 32.97549680679563, "time": 2.3023993968963623}, {"layer": 31, "module": "mlp.up_proj", "avg_loss": 1003.2869543650794, "time": 2.2799007892608643}, {"layer": 31, "module": "mlp.gate_proj", "avg_loss": 926.3782242063492, "time": 2.5045313835144043}, {"layer": 31, "module": "mlp.down_proj", "avg_loss": 125.35125248015873, "time": 7.961044549942017}, {"layer": 32, "module": "self_attn.k_proj", "avg_loss": 115.18201264880952, "time": 2.426957368850708}, {"layer": 32, "module": "self_attn.v_proj", "avg_loss": 61.137381417410715, "time": 2.5159573554992676}, {"layer": 32, "module": "self_attn.q_proj", "avg_loss": 599.6400669642857, "time": 2.381300210952759}, {"layer": 32, "module": "self_attn.o_proj", "avg_loss": 40.22874813988095, "time": 2.634478807449341}, {"layer": 32, "module": "mlp.up_proj", "avg_loss": 1044.0261656746031, "time": 2.7347238063812256}, {"layer": 32, "module": "mlp.gate_proj", "avg_loss": 957.6473834325396, "time": 2.938586473464966}, {"layer": 32, "module": "mlp.down_proj", "avg_loss": 145.26103670634922, "time": 7.490089178085327}, {"layer": 33, "module": "self_attn.k_proj", "avg_loss": 132.7781963045635, "time": 2.1406209468841553}, {"layer": 33, "module": "self_attn.v_proj", "avg_loss": 69.52664620535714, "time": 2.127328395843506}, {"layer": 33, "module": "self_attn.q_proj", "avg_loss": 720.2191220238095, "time": 2.266737222671509}, {"layer": 33, "module": "self_attn.o_proj", "avg_loss": 45.76482669890873, "time": 2.109088897705078}, {"layer": 33, "module": "mlp.up_proj", "avg_loss": 1154.3797123015872, "time": 2.343935251235962}, {"layer": 33, "module": "mlp.gate_proj", "avg_loss": 1050.7652529761904, "time": 2.4407670497894287}, {"layer": 33, "module": "mlp.down_proj", "avg_loss": 169.39468625992063, "time": 7.000013828277588}, {"layer": 34, "module": "self_attn.k_proj", "avg_loss": 120.54341827876983, "time": 2.1037535667419434}, {"layer": 34, "module": "self_attn.v_proj", "avg_loss": 80.45408606150794, "time": 2.126620292663574}, {"layer": 34, "module": "self_attn.q_proj", "avg_loss": 678.4817708333334, "time": 2.431802272796631}, {"layer": 34, "module": "self_attn.o_proj", "avg_loss": 56.026731460813494, "time": 1.9837536811828613}, {"layer": 34, "module": "mlp.up_proj", "avg_loss": 1254.1155753968253, "time": 2.356731414794922}, {"layer": 34, "module": "mlp.gate_proj", "avg_loss": 1128.4883432539682, "time": 2.449679374694824}, {"layer": 34, "module": "mlp.down_proj", "avg_loss": 200.347408234127, "time": 6.9652323722839355}, {"layer": 35, "module": "self_attn.k_proj", "avg_loss": 121.49111793154762, "time": 2.2837982177734375}, {"layer": 35, "module": "self_attn.v_proj", "avg_loss": 93.78024243551587, "time": 2.5460503101348877}, {"layer": 35, "module": "self_attn.q_proj", "avg_loss": 718.2283606150794, "time": 2.1673877239227295}, {"layer": 35, "module": "self_attn.o_proj", "avg_loss": 62.98656839037698, "time": 2.1774699687957764}, {"layer": 35, "module": "mlp.up_proj", "avg_loss": 1256.5044642857142, "time": 2.397977828979492}, {"layer": 35, "module": "mlp.gate_proj", "avg_loss": 1121.9696180555557, "time": 2.2232072353363037}, {"layer": 35, "module": "mlp.down_proj", "avg_loss": 222.02959139384922, "time": 6.944913625717163}, {"layer": 36, "module": "self_attn.k_proj", "avg_loss": 126.06265500992063, "time": 2.3256494998931885}, {"layer": 36, "module": "self_attn.v_proj", "avg_loss": 87.74272228422619, "time": 2.3169636726379395}, {"layer": 36, "module": "self_attn.q_proj", "avg_loss": 718.4513888888889, "time": 2.324816942214966}, {"layer": 36, "module": "self_attn.o_proj", "avg_loss": 73.57989986359127, "time": 2.484311819076538}, {"layer": 36, "module": "mlp.up_proj", "avg_loss": 1334.4415922619048, "time": 2.213104248046875}, {"layer": 36, "module": "mlp.gate_proj", "avg_loss": 1191.7889384920634, "time": 2.493518352508545}, {"layer": 36, "module": "mlp.down_proj", "avg_loss": 275.88941592261904, "time": 7.860870599746704}, {"layer": 37, "module": "self_attn.k_proj", "avg_loss": 127.92144097222223, "time": 2.2784276008605957}, {"layer": 37, "module": "self_attn.v_proj", "avg_loss": 116.53499348958333, "time": 2.631031036376953}, {"layer": 37, "module": "self_attn.q_proj", "avg_loss": 810.6729290674604, "time": 2.204740047454834}, {"layer": 37, "module": "self_attn.o_proj", "avg_loss": 63.32557896205357, "time": 2.1935198307037354}, {"layer": 37, "module": "mlp.up_proj", "avg_loss": 1524.1112351190477, "time": 2.927372694015503}, {"layer": 37, "module": "mlp.gate_proj", "avg_loss": 1347.859623015873, "time": 2.3059942722320557}, {"layer": 37, "module": "mlp.down_proj", "avg_loss": 342.19723462301585, "time": 7.050895690917969}, {"layer": 38, "module": "self_attn.k_proj", "avg_loss": 116.72348555307539, "time": 2.261777877807617}, {"layer": 38, "module": "self_attn.v_proj", "avg_loss": 117.39208984375, "time": 2.815408945083618}, {"layer": 38, "module": "self_attn.q_proj", "avg_loss": 734.2388392857143, "time": 2.3687357902526855}, {"layer": 38, "module": "self_attn.o_proj", "avg_loss": 100.2750961061508, "time": 2.4433066844940186}, {"layer": 38, "module": "mlp.up_proj", "avg_loss": 1695.0897817460318, "time": 2.4754209518432617}, {"layer": 38, "module": "mlp.gate_proj", "avg_loss": 1501.626240079365, "time": 2.731534481048584}, {"layer": 38, "module": "mlp.down_proj", "avg_loss": 447.2271205357143, "time": 7.6386191844940186}, {"layer": 39, "module": "self_attn.k_proj", "avg_loss": 126.81036861359127, "time": 2.5480144023895264}, {"layer": 39, "module": "self_attn.v_proj", "avg_loss": 138.64192708333334, "time": 2.3330159187316895}, {"layer": 39, "module": "self_attn.q_proj", "avg_loss": 789.4280133928571, "time": 2.3484737873077393}, {"layer": 39, "module": "self_attn.o_proj", "avg_loss": 99.2939685639881, "time": 2.291820764541626}, {"layer": 39, "module": "mlp.up_proj", "avg_loss": 1915.2368551587301, "time": 2.5068767070770264}, {"layer": 39, "module": "mlp.gate_proj", "avg_loss": 1709.9332837301588, "time": 2.7119457721710205}, {"layer": 39, "module": "mlp.down_proj", "avg_loss": 521.0388764880952, "time": 7.344953298568726}, {"layer": 40, "module": "self_attn.k_proj", "avg_loss": 124.90468439980158, "time": 2.1272478103637695}, {"layer": 40, "module": "self_attn.v_proj", "avg_loss": 130.49514818948413, "time": 2.4202959537506104}, {"layer": 40, "module": "self_attn.q_proj", "avg_loss": 772.0674603174604, "time": 2.311368703842163}, {"layer": 40, "module": "self_attn.o_proj", "avg_loss": 92.47112165178571, "time": 2.247061014175415}, {"layer": 40, "module": "mlp.up_proj", "avg_loss": 2055.5394345238096, "time": 2.2806572914123535}, {"layer": 40, "module": "mlp.gate_proj", "avg_loss": 1843.6356646825398, "time": 2.4861927032470703}, {"layer": 40, "module": "mlp.down_proj", "avg_loss": 612.343501984127, "time": 7.836683988571167}, {"layer": 41, "module": "self_attn.k_proj", "avg_loss": 119.70989893353175, "time": 2.2113778591156006}, {"layer": 41, "module": "self_attn.v_proj", "avg_loss": 143.247798859127, "time": 2.623634099960327}, {"layer": 41, "module": "self_attn.q_proj", "avg_loss": 754.5211433531746, "time": 2.273845672607422}, {"layer": 41, "module": "self_attn.o_proj", "avg_loss": 92.20050533234127, "time": 2.7770090103149414}, {"layer": 41, "module": "mlp.up_proj", "avg_loss": 2257.6153273809523, "time": 2.8424792289733887}, {"layer": 41, "module": "mlp.gate_proj", "avg_loss": 2053.557663690476, "time": 2.5228803157806396}, {"layer": 41, "module": "mlp.down_proj", "avg_loss": 739.0953621031746, "time": 7.683025121688843}, {"layer": 42, "module": "self_attn.k_proj", "avg_loss": 125.31246124751983, "time": 2.294351816177368}, {"layer": 42, "module": "self_attn.v_proj", "avg_loss": 172.73037574404762, "time": 2.4333133697509766}, {"layer": 42, "module": "self_attn.q_proj", "avg_loss": 820.9809027777778, "time": 2.292743682861328}, {"layer": 42, "module": "self_attn.o_proj", "avg_loss": 113.22722904265873, "time": 2.6487948894500732}, {"layer": 42, "module": "mlp.up_proj", "avg_loss": 2433.6999007936506, "time": 3.0376367568969727}, {"layer": 42, "module": "mlp.gate_proj", "avg_loss": 2221.797371031746, "time": 2.593151569366455}, {"layer": 42, "module": "mlp.down_proj", "avg_loss": 893.7100694444445, "time": 7.888392448425293}, {"layer": 43, "module": "self_attn.k_proj", "avg_loss": 112.40691654265873, "time": 2.7108700275421143}, {"layer": 43, "module": "self_attn.v_proj", "avg_loss": 195.31094990079364, "time": 2.3544721603393555}, {"layer": 43, "module": "self_attn.q_proj", "avg_loss": 752.6815476190476, "time": 2.5619618892669678}, {"layer": 43, "module": "self_attn.o_proj", "avg_loss": 74.88033234126983, "time": 2.6533796787261963}, {"layer": 43, "module": "mlp.up_proj", "avg_loss": 2619.8819444444443, "time": 2.880476236343384}, {"layer": 43, "module": "mlp.gate_proj", "avg_loss": 2433.9615575396824, "time": 2.500520944595337}, {"layer": 43, "module": "mlp.down_proj", "avg_loss": 962.530753968254, "time": 8.172363758087158}, {"layer": 44, "module": "self_attn.k_proj", "avg_loss": 121.62360491071429, "time": 2.7539632320404053}, {"layer": 44, "module": "self_attn.v_proj", "avg_loss": 222.51134672619048, "time": 2.6679325103759766}, {"layer": 44, "module": "self_attn.q_proj", "avg_loss": 816.3943452380952, "time": 2.758798599243164}, {"layer": 44, "module": "self_attn.o_proj", "avg_loss": 104.31371682787699, "time": 2.8383164405822754}, {"layer": 44, "module": "mlp.up_proj", "avg_loss": 2817.3479662698414, "time": 2.326167583465576}, {"layer": 44, "module": "mlp.gate_proj", "avg_loss": 2648.2740575396824, "time": 2.4691481590270996}, {"layer": 44, "module": "mlp.down_proj", "avg_loss": 1103.3876488095239, "time": 7.661853551864624}, {"layer": 45, "module": "self_attn.k_proj", "avg_loss": 126.9798564608135, "time": 2.357455253601074}, {"layer": 45, "module": "self_attn.v_proj", "avg_loss": 229.85159350198413, "time": 2.474092960357666}, {"layer": 45, "module": "self_attn.q_proj", "avg_loss": 830.1587301587301, "time": 2.4601471424102783}, {"layer": 45, "module": "self_attn.o_proj", "avg_loss": 115.72223772321429, "time": 2.4976422786712646}, {"layer": 45, "module": "mlp.up_proj", "avg_loss": 2990.968253968254, "time": 2.4489476680755615}, {"layer": 45, "module": "mlp.gate_proj", "avg_loss": 2839.6875, "time": 2.601884365081787}, {"layer": 45, "module": "mlp.down_proj", "avg_loss": 1253.373759920635, "time": 7.3495683670043945}, {"layer": 46, "module": "self_attn.k_proj", "avg_loss": 129.97213696676587, "time": 2.1209051609039307}, {"layer": 46, "module": "self_attn.v_proj", "avg_loss": 221.04041108630952, "time": 2.316596746444702}, {"layer": 46, "module": "self_attn.q_proj", "avg_loss": 828.5912698412699, "time": 2.3503916263580322}, {"layer": 46, "module": "self_attn.o_proj", "avg_loss": 81.99181547619048, "time": 2.480151891708374}, {"layer": 46, "module": "mlp.up_proj", "avg_loss": 3124.347718253968, "time": 2.574131965637207}, {"layer": 46, "module": "mlp.gate_proj", "avg_loss": 3000.7648809523807, "time": 2.4222729206085205}, {"layer": 46, "module": "mlp.down_proj", "avg_loss": 1326.6517857142858, "time": 7.630677700042725}, {"layer": 47, "module": "self_attn.k_proj", "avg_loss": 147.10441468253967, "time": 2.508167266845703}, {"layer": 47, "module": "self_attn.v_proj", "avg_loss": 232.3642578125, "time": 2.2677371501922607}, {"layer": 47, "module": "self_attn.q_proj", "avg_loss": 914.2815600198413, "time": 2.29089093208313}, {"layer": 47, "module": "self_attn.o_proj", "avg_loss": 146.66403149801587, "time": 2.4552419185638428}, {"layer": 47, "module": "mlp.up_proj", "avg_loss": 3342.0228174603176, "time": 2.5004353523254395}, {"layer": 47, "module": "mlp.gate_proj", "avg_loss": 3211.8179563492063, "time": 2.753396987915039}, {"layer": 47, "module": "mlp.down_proj", "avg_loss": 1486.897941468254, "time": 7.6181640625}, {"layer": 48, "module": "self_attn.k_proj", "avg_loss": 153.8843781001984, "time": 2.3019278049468994}, {"layer": 48, "module": "self_attn.v_proj", "avg_loss": 260.39496527777777, "time": 2.370476245880127}, {"layer": 48, "module": "self_attn.q_proj", "avg_loss": 970.0141369047619, "time": 2.477303981781006}, {"layer": 48, "module": "self_attn.o_proj", "avg_loss": 114.16617063492063, "time": 2.398376941680908}, {"layer": 48, "module": "mlp.up_proj", "avg_loss": 3585.636160714286, "time": 2.4721927642822266}, {"layer": 48, "module": "mlp.gate_proj", "avg_loss": 3446.2564484126983, "time": 2.747601270675659}, {"layer": 48, "module": "mlp.down_proj", "avg_loss": 1613.2171378968253, "time": 7.360426664352417}, {"layer": 49, "module": "self_attn.k_proj", "avg_loss": 158.35286458333334, "time": 2.3711202144622803}, {"layer": 49, "module": "self_attn.v_proj", "avg_loss": 321.1028645833333, "time": 2.179861545562744}, {"layer": 49, "module": "self_attn.q_proj", "avg_loss": 1022.5592137896825, "time": 2.2130579948425293}, {"layer": 49, "module": "self_attn.o_proj", "avg_loss": 112.45407056051587, "time": 2.2755634784698486}, {"layer": 49, "module": "mlp.up_proj", "avg_loss": 3830.6297123015875, "time": 2.3482911586761475}, {"layer": 49, "module": "mlp.gate_proj", "avg_loss": 3675.6507936507937, "time": 2.601771593093872}, {"layer": 49, "module": "mlp.down_proj", "avg_loss": 1706.3080357142858, "time": 7.879899740219116}, {"layer": 50, "module": "self_attn.k_proj", "avg_loss": 165.44831969246033, "time": 2.3640949726104736}, {"layer": 50, "module": "self_attn.v_proj", "avg_loss": 334.19078621031747, "time": 2.149076461791992}, {"layer": 50, "module": "self_attn.q_proj", "avg_loss": 1068.608630952381, "time": 2.2399165630340576}, {"layer": 50, "module": "self_attn.o_proj", "avg_loss": 153.37052021329364, "time": 2.5492467880249023}, {"layer": 50, "module": "mlp.up_proj", "avg_loss": 4087.2281746031745, "time": 2.4953083992004395}, {"layer": 50, "module": "mlp.gate_proj", "avg_loss": 3910.012648809524, "time": 2.550257921218872}, {"layer": 50, "module": "mlp.down_proj", "avg_loss": 1849.389136904762, "time": 7.23482608795166}, {"layer": 51, "module": "self_attn.k_proj", "avg_loss": 165.87076822916666, "time": 2.7653841972351074}, {"layer": 51, "module": "self_attn.v_proj", "avg_loss": 322.88160342261904, "time": 2.21199893951416}, {"layer": 51, "module": "self_attn.q_proj", "avg_loss": 1026.5469990079366, "time": 2.159925699234009}, {"layer": 51, "module": "self_attn.o_proj", "avg_loss": 139.60396515376985, "time": 2.5704996585845947}, {"layer": 51, "module": "mlp.up_proj", "avg_loss": 4348.342757936508, "time": 2.276313304901123}, {"layer": 51, "module": "mlp.gate_proj", "avg_loss": 4147.024057539683, "time": 2.3875865936279297}, {"layer": 51, "module": "mlp.down_proj", "avg_loss": 1985.6436011904761, "time": 6.95845103263855}, {"layer": 52, "module": "self_attn.k_proj", "avg_loss": 190.30217633928572, "time": 2.1422195434570312}, {"layer": 52, "module": "self_attn.v_proj", "avg_loss": 372.58327132936506, "time": 2.2324423789978027}, {"layer": 52, "module": "self_attn.q_proj", "avg_loss": 1219.0659722222222, "time": 2.2752833366394043}, {"layer": 52, "module": "self_attn.o_proj", "avg_loss": 220.32500930059524, "time": 2.502460479736328}, {"layer": 52, "module": "mlp.up_proj", "avg_loss": 4602.40128968254, "time": 3.1032702922821045}, {"layer": 52, "module": "mlp.gate_proj", "avg_loss": 4355.164682539683, "time": 2.730194568634033}, {"layer": 52, "module": "mlp.down_proj", "avg_loss": 2125.9375, "time": 7.2058937549591064}, {"layer": 53, "module": "self_attn.k_proj", "avg_loss": 178.00337921626985, "time": 2.326580762863159}, {"layer": 53, "module": "self_attn.v_proj", "avg_loss": 348.53769841269843, "time": 2.2728264331817627}, {"layer": 53, "module": "self_attn.q_proj", "avg_loss": 1102.685515873016, "time": 2.5316574573516846}, {"layer": 53, "module": "self_attn.o_proj", "avg_loss": 133.4817088293651, "time": 2.2939693927764893}, {"layer": 53, "module": "mlp.up_proj", "avg_loss": 4830.659722222223, "time": 2.645214796066284}, {"layer": 53, "module": "mlp.gate_proj", "avg_loss": 4548.589285714285, "time": 2.4593372344970703}, {"layer": 53, "module": "mlp.down_proj", "avg_loss": 2270.096726190476, "time": 7.737048864364624}, {"layer": 54, "module": "self_attn.k_proj", "avg_loss": 193.93629092261904, "time": 2.502086639404297}, {"layer": 54, "module": "self_attn.v_proj", "avg_loss": 444.83599950396825, "time": 2.6019043922424316}, {"layer": 54, "module": "self_attn.q_proj", "avg_loss": 1239.1459573412699, "time": 2.585387706756592}, {"layer": 54, "module": "self_attn.o_proj", "avg_loss": 178.5931609623016, "time": 2.2646780014038086}, {"layer": 54, "module": "mlp.up_proj", "avg_loss": 5092.600198412699, "time": 2.86997389793396}, {"layer": 54, "module": "mlp.gate_proj", "avg_loss": 4754.4578373015875, "time": 2.916710615158081}, {"layer": 54, "module": "mlp.down_proj", "avg_loss": 2416.8973214285716, "time": 7.564112901687622}, {"layer": 55, "module": "self_attn.k_proj", "avg_loss": 189.19401041666666, "time": 2.4526779651641846}, {"layer": 55, "module": "self_attn.v_proj", "avg_loss": 443.6829117063492, "time": 2.284388542175293}, {"layer": 55, "module": "self_attn.q_proj", "avg_loss": 1226.7890625, "time": 2.2727792263031006}, {"layer": 55, "module": "self_attn.o_proj", "avg_loss": 191.6318359375, "time": 2.1943600177764893}, {"layer": 55, "module": "mlp.up_proj", "avg_loss": 5305.555059523809, "time": 2.57393217086792}, {"layer": 55, "module": "mlp.gate_proj", "avg_loss": 4916.112103174603, "time": 2.293924331665039}, {"layer": 55, "module": "mlp.down_proj", "avg_loss": 2540.9598214285716, "time": 7.566238641738892}, {"layer": 56, "module": "self_attn.k_proj", "avg_loss": 193.98652963789684, "time": 2.574528694152832}, {"layer": 56, "module": "self_attn.v_proj", "avg_loss": 439.267330109127, "time": 2.5464837551116943}, {"layer": 56, "module": "self_attn.q_proj", "avg_loss": 1265.3353174603174, "time": 2.3206353187561035}, {"layer": 56, "module": "self_attn.o_proj", "avg_loss": 271.29265873015873, "time": 2.8428103923797607}, {"layer": 56, "module": "mlp.up_proj", "avg_loss": 5558.982142857143, "time": 2.321178436279297}, {"layer": 56, "module": "mlp.gate_proj", "avg_loss": 5102.157242063492, "time": 2.2676870822906494}, {"layer": 56, "module": "mlp.down_proj", "avg_loss": 2736.2261904761904, "time": 7.457844257354736}, {"layer": 57, "module": "self_attn.k_proj", "avg_loss": 187.62400793650792, "time": 2.517690658569336}, {"layer": 57, "module": "self_attn.v_proj", "avg_loss": 494.71478174603175, "time": 2.5542826652526855}, {"layer": 57, "module": "self_attn.q_proj", "avg_loss": 1303.9290674603174, "time": 2.4277503490448}, {"layer": 57, "module": "self_attn.o_proj", "avg_loss": 491.52179439484127, "time": 2.5104193687438965}, {"layer": 57, "module": "mlp.up_proj", "avg_loss": 5708.016865079365, "time": 2.4943971633911133}, {"layer": 57, "module": "mlp.gate_proj", "avg_loss": 5182.5297619047615, "time": 2.3458802700042725}, {"layer": 57, "module": "mlp.down_proj", "avg_loss": 3056.1183035714284, "time": 7.2268967628479}, {"layer": 58, "module": "self_attn.k_proj", "avg_loss": 156.90618799603175, "time": 2.2951223850250244}, {"layer": 58, "module": "self_attn.v_proj", "avg_loss": 317.41675967261904, "time": 2.2159953117370605}, {"layer": 58, "module": "self_attn.q_proj", "avg_loss": 950.9200148809524, "time": 2.2867612838745117}, {"layer": 58, "module": "self_attn.o_proj", "avg_loss": 200.71502976190476, "time": 2.3342673778533936}, {"layer": 58, "module": "mlp.up_proj", "avg_loss": 5801.086309523809, "time": 2.8814148902893066}, {"layer": 58, "module": "mlp.gate_proj", "avg_loss": 5174.054563492064, "time": 2.979644536972046}, {"layer": 58, "module": "mlp.down_proj", "avg_loss": 3398.2586805555557, "time": 7.378202438354492}, {"layer": 59, "module": "self_attn.k_proj", "avg_loss": 147.54904513888889, "time": 2.2939393520355225}, {"layer": 59, "module": "self_attn.v_proj", "avg_loss": 263.13795882936506, "time": 2.3206112384796143}, {"layer": 59, "module": "self_attn.q_proj", "avg_loss": 880.9408482142857, "time": 2.20632004737854}, {"layer": 59, "module": "self_attn.o_proj", "avg_loss": 213.16493055555554, "time": 2.1653060913085938}, {"layer": 59, "module": "mlp.up_proj", "avg_loss": 5770.462797619048, "time": 2.27760910987854}, {"layer": 59, "module": "mlp.gate_proj", "avg_loss": 5112.523313492064, "time": 2.501638889312744}, {"layer": 59, "module": "mlp.down_proj", "avg_loss": 4361.241567460317, "time": 7.145565032958984}, {"layer": 60, "module": "self_attn.k_proj", "avg_loss": 119.16657366071429, "time": 2.2371904850006104}, {"layer": 60, "module": "self_attn.v_proj", "avg_loss": 212.84351748511904, "time": 2.211812973022461}, {"layer": 60, "module": "self_attn.q_proj", "avg_loss": 709.5553075396825, "time": 2.194713830947876}, {"layer": 60, "module": "self_attn.o_proj", "avg_loss": 217.11297123015873, "time": 2.3864479064941406}, {"layer": 60, "module": "mlp.up_proj", "avg_loss": 5256.19246031746, "time": 2.857273817062378}, {"layer": 60, "module": "mlp.gate_proj", "avg_loss": 4872.780257936508, "time": 2.58553409576416}, {"layer": 60, "module": "mlp.down_proj", "avg_loss": 11553.51884920635, "time": 7.098440408706665}]