|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 14%|ββ | 1/7 [00:11<01:09, 11.62s/it]
Loading checkpoint shards: 29%|βββ | 2/7 [00:23<00:58, 11.65s/it]
Loading checkpoint shards: 43%|βββββ | 3/7 [00:34<00:46, 11.67s/it]
Loading checkpoint shards: 57%|ββββββ | 4/7 [00:46<00:34, 11.64s/it]
Loading checkpoint shards: 71%|ββββββββ | 5/7 [00:58<00:23, 11.68s/it]
Loading checkpoint shards: 86%|βββββββββ | 6/7 [01:09<00:11, 11.64s/it]
Loading checkpoint shards: 100%|ββββββββββ| 7/7 [01:16<00:00, 10.09s/it]
Loading checkpoint shards: 100%|ββββββββββ| 7/7 [01:16<00:00, 10.97s/it] |
|
Found cached dataset json (/home/usbhost/.cache/huggingface/datasets/allenai___json/allenai--c4-6fbe877195f42de5/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51) |
|
Found cached dataset json (/home/usbhost/.cache/huggingface/datasets/allenai___json/allenai--c4-efc3d4f4606f44bd/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51) |
|
Starting ... |
|
Ready. |
|
0 self_attn.k_proj |
|
Quantizing ... |
|
time 5.59 |
|
error 248.51150512695312 |
|
0 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 23.832942962646484 |
|
0 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 208.9890899658203 |
|
0 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 1.2797350883483887 |
|
0 mlp.up_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 85.32704162597656 |
|
0 mlp.gate_proj |
|
Quantizing ... |
|
time 4.67 |
|
error 92.8025131225586 |
|
0 mlp.down_proj |
|
Quantizing ... |
|
time 15.19 |
|
error 7.90690803527832 |
|
1 self_attn.k_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 491.87091064453125 |
|
1 self_attn.v_proj |
|
Quantizing ... |
|
time 4.61 |
|
error 65.71781158447266 |
|
1 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 481.22857666015625 |
|
1 self_attn.o_proj |
|
Quantizing ... |
|
time 5.37 |
|
error 14.926952362060547 |
|
1 mlp.up_proj |
|
Quantizing ... |
|
time 5.54 |
|
error 569.1054077148438 |
|
1 mlp.gate_proj |
|
Quantizing ... |
|
time 4.69 |
|
error 651.648193359375 |
|
1 mlp.down_proj |
|
Quantizing ... |
|
time 15.20 |
|
error 52.38072204589844 |
|
2 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 1702.48681640625 |
|
2 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 328.9317321777344 |
|
2 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 1640.189453125 |
|
2 self_attn.o_proj |
|
Quantizing ... |
|
time 5.39 |
|
error 32.7197380065918 |
|
2 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 1899.3172607421875 |
|
2 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 2259.476806640625 |
|
2 mlp.down_proj |
|
Quantizing ... |
|
time 15.23 |
|
error 115.92791748046875 |
|
3 self_attn.k_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 3070.20654296875 |
|
3 self_attn.v_proj |
|
Quantizing ... |
|
time 4.61 |
|
error 640.16943359375 |
|
3 self_attn.q_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 2804.199462890625 |
|
3 self_attn.o_proj |
|
Quantizing ... |
|
time 5.36 |
|
error 56.04383850097656 |
|
3 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 3143.75732421875 |
|
3 mlp.gate_proj |
|
Quantizing ... |
|
time 4.70 |
|
error 3672.450439453125 |
|
3 mlp.down_proj |
|
Quantizing ... |
|
time 15.21 |
|
error 455.0036315917969 |
|
4 self_attn.k_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 8661.1416015625 |
|
4 self_attn.v_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 3130.848388671875 |
|
4 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 8500.8896484375 |
|
4 self_attn.o_proj |
|
Quantizing ... |
|
time 5.36 |
|
error 73.76594543457031 |
|
4 mlp.up_proj |
|
Quantizing ... |
|
time 5.55 |
|
error 4750.201171875 |
|
4 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 5532.53076171875 |
|
4 mlp.down_proj |
|
Quantizing ... |
|
time 15.24 |
|
error 274.5998229980469 |
|
5 self_attn.k_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 9668.17578125 |
|
5 self_attn.v_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 3808.4091796875 |
|
5 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 9597.880859375 |
|
5 self_attn.o_proj |
|
Quantizing ... |
|
time 5.39 |
|
error 96.71366882324219 |
|
5 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 5799.3759765625 |
|
5 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 6807.18798828125 |
|
5 mlp.down_proj |
|
Quantizing ... |
|
time 15.21 |
|
error 389.222900390625 |
|
6 self_attn.k_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 10976.859375 |
|
6 self_attn.v_proj |
|
Quantizing ... |
|
time 4.63 |
|
error 4781.712890625 |
|
6 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 11052.400390625 |
|
6 self_attn.o_proj |
|
Quantizing ... |
|
time 5.36 |
|
error 126.38148498535156 |
|
6 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 7372.54296875 |
|
6 mlp.gate_proj |
|
Quantizing ... |
|
time 4.69 |
|
error 8426.8515625 |
|
6 mlp.down_proj |
|
Quantizing ... |
|
time 15.25 |
|
error 516.6248779296875 |
|
7 self_attn.k_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 12607.513671875 |
|
7 self_attn.v_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 5585.8876953125 |
|
7 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 12423.859375 |
|
7 self_attn.o_proj |
|
Quantizing ... |
|
time 5.37 |
|
error 216.31124877929688 |
|
7 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 8981.8994140625 |
|
7 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 10161.09375 |
|
7 mlp.down_proj |
|
Quantizing ... |
|
time 15.23 |
|
error 673.1290283203125 |
|
8 self_attn.k_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 13563.486328125 |
|
8 self_attn.v_proj |
|
Quantizing ... |
|
time 4.61 |
|
error 6292.990234375 |
|
8 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 13392.42578125 |
|
8 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 296.80194091796875 |
|
8 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 10729.47265625 |
|
8 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 12080.126953125 |
|
8 mlp.down_proj |
|
Quantizing ... |
|
time 15.19 |
|
error 850.3040161132812 |
|
9 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 13440.3017578125 |
|
9 self_attn.v_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 6355.3544921875 |
|
9 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 12908.4482421875 |
|
9 self_attn.o_proj |
|
Quantizing ... |
|
time 5.34 |
|
error 356.29302978515625 |
|
9 mlp.up_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 12409.3623046875 |
|
9 mlp.gate_proj |
|
Quantizing ... |
|
time 4.69 |
|
error 13892.767578125 |
|
9 mlp.down_proj |
|
Quantizing ... |
|
time 15.18 |
|
error 1032.557861328125 |
|
10 self_attn.k_proj |
|
Quantizing ... |
|
time 5.19 |
|
error 15417.884765625 |
|
10 self_attn.v_proj |
|
Quantizing ... |
|
time 4.50 |
|
error 7487.8876953125 |
|
10 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 14854.5859375 |
|
10 self_attn.o_proj |
|
Quantizing ... |
|
time 4.65 |
|
error 522.43212890625 |
|
10 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 13540.619140625 |
|
10 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 14910.919921875 |
|
10 mlp.down_proj |
|
Quantizing ... |
|
time 15.21 |
|
error 1451.3995361328125 |
|
11 self_attn.k_proj |
|
Quantizing ... |
|
time 5.45 |
|
error 19503.8125 |
|
11 self_attn.v_proj |
|
Quantizing ... |
|
time 4.54 |
|
error 10578.271484375 |
|
11 self_attn.q_proj |
|
Quantizing ... |
|
time 4.53 |
|
error 19087.310546875 |
|
11 self_attn.o_proj |
|
Quantizing ... |
|
time 5.29 |
|
error 460.1400146484375 |
|
11 mlp.up_proj |
|
Quantizing ... |
|
time 5.46 |
|
error 15259.3271484375 |
|
11 mlp.gate_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 16558.69921875 |
|
11 mlp.down_proj |
|
Quantizing ... |
|
time 15.07 |
|
error 1355.914794921875 |
|
12 self_attn.k_proj |
|
Quantizing ... |
|
time 5.46 |
|
error 19905.421875 |
|
12 self_attn.v_proj |
|
Quantizing ... |
|
time 4.53 |
|
error 10467.2255859375 |
|
12 self_attn.q_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 19042.11328125 |
|
12 self_attn.o_proj |
|
Quantizing ... |
|
time 5.30 |
|
error 554.3314208984375 |
|
12 mlp.up_proj |
|
Quantizing ... |
|
time 5.46 |
|
error 16581.44921875 |
|
12 mlp.gate_proj |
|
Quantizing ... |
|
time 4.63 |
|
error 17876.6640625 |
|
12 mlp.down_proj |
|
Quantizing ... |
|
time 14.99 |
|
error 1520.2266845703125 |
|
13 self_attn.k_proj |
|
Quantizing ... |
|
time 5.40 |
|
error 18666.96875 |
|
13 self_attn.v_proj |
|
Quantizing ... |
|
time 4.50 |
|
error 9848.2939453125 |
|
13 self_attn.q_proj |
|
Quantizing ... |
|
time 4.48 |
|
error 17859.89453125 |
|
13 self_attn.o_proj |
|
Quantizing ... |
|
time 5.27 |
|
error 736.5158081054688 |
|
13 mlp.up_proj |
|
Quantizing ... |
|
time 5.44 |
|
error 17094.962890625 |
|
13 mlp.gate_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 17981.009765625 |
|
13 mlp.down_proj |
|
Quantizing ... |
|
time 14.99 |
|
error 1649.564697265625 |
|
14 self_attn.k_proj |
|
Quantizing ... |
|
time 5.43 |
|
error 19464.08984375 |
|
14 self_attn.v_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 10684.158203125 |
|
14 self_attn.q_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 18777.158203125 |
|
14 self_attn.o_proj |
|
Quantizing ... |
|
time 5.27 |
|
error 666.4788818359375 |
|
14 mlp.up_proj |
|
Quantizing ... |
|
time 5.43 |
|
error 18003.3359375 |
|
14 mlp.gate_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 18659.9375 |
|
14 mlp.down_proj |
|
Quantizing ... |
|
time 14.99 |
|
error 1756.21533203125 |
|
15 self_attn.k_proj |
|
Quantizing ... |
|
time 5.42 |
|
error 22731.232421875 |
|
15 self_attn.v_proj |
|
Quantizing ... |
|
time 4.53 |
|
error 13153.8212890625 |
|
15 self_attn.q_proj |
|
Quantizing ... |
|
time 4.56 |
|
error 22046.060546875 |
|
15 self_attn.o_proj |
|
Quantizing ... |
|
time 5.33 |
|
error 927.9457397460938 |
|
15 mlp.up_proj |
|
Quantizing ... |
|
time 5.49 |
|
error 18842.2421875 |
|
15 mlp.gate_proj |
|
Quantizing ... |
|
time 4.64 |
|
error 19572.6171875 |
|
15 mlp.down_proj |
|
Quantizing ... |
|
time 15.11 |
|
error 2022.401123046875 |
|
16 self_attn.k_proj |
|
Quantizing ... |
|
time 5.48 |
|
error 23348.453125 |
|
16 self_attn.v_proj |
|
Quantizing ... |
|
time 4.56 |
|
error 14359.458984375 |
|
16 self_attn.q_proj |
|
Quantizing ... |
|
time 4.56 |
|
error 22662.978515625 |
|
16 self_attn.o_proj |
|
Quantizing ... |
|
time 5.32 |
|
error 903.6290893554688 |
|
16 mlp.up_proj |
|
Quantizing ... |
|
time 5.49 |
|
error 20053.740234375 |
|
16 mlp.gate_proj |
|
Quantizing ... |
|
time 4.64 |
|
error 20500.84765625 |
|
16 mlp.down_proj |
|
Quantizing ... |
|
time 15.12 |
|
error 2178.75537109375 |
|
17 self_attn.k_proj |
|
Quantizing ... |
|
time 5.48 |
|
error 22102.595703125 |
|
17 self_attn.v_proj |
|
Quantizing ... |
|
time 4.56 |
|
error 13796.431640625 |
|
17 self_attn.q_proj |
|
Quantizing ... |
|
time 4.56 |
|
error 21351.3203125 |
|
17 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 914.3590087890625 |
|
17 mlp.up_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 21227.931640625 |
|
17 mlp.gate_proj |
|
Quantizing ... |
|
time 4.66 |
|
error 21392.0234375 |
|
17 mlp.down_proj |
|
Quantizing ... |
|
time 15.16 |
|
error 2325.583740234375 |
|
18 self_attn.k_proj |
|
Quantizing ... |
|
time 5.49 |
|
error 23322.21484375 |
|
18 self_attn.v_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 14129.474609375 |
|
18 self_attn.q_proj |
|
Quantizing ... |
|
time 4.56 |
|
error 22335.5390625 |
|
18 self_attn.o_proj |
|
Quantizing ... |
|
time 5.32 |
|
error 1253.7630615234375 |
|
18 mlp.up_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 22060.33203125 |
|
18 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 22286.359375 |
|
18 mlp.down_proj |
|
Quantizing ... |
|
time 15.18 |
|
error 2743.5048828125 |
|
19 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 22394.88671875 |
|
19 self_attn.v_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 14984.123046875 |
|
19 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 21649.625 |
|
19 self_attn.o_proj |
|
Quantizing ... |
|
time 5.36 |
|
error 1130.92822265625 |
|
19 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 23244.22265625 |
|
19 mlp.gate_proj |
|
Quantizing ... |
|
time 4.69 |
|
error 23601.015625 |
|
19 mlp.down_proj |
|
Quantizing ... |
|
time 15.26 |
|
error 3028.933349609375 |
|
20 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 22318.123046875 |
|
20 self_attn.v_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 15997.8583984375 |
|
20 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 21778.65234375 |
|
20 self_attn.o_proj |
|
Quantizing ... |
|
time 5.36 |
|
error 1066.665283203125 |
|
20 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 25091.755859375 |
|
20 mlp.gate_proj |
|
Quantizing ... |
|
time 4.69 |
|
error 25606.6796875 |
|
20 mlp.down_proj |
|
Quantizing ... |
|
time 15.15 |
|
error 3200.33935546875 |
|
21 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 22088.099609375 |
|
21 self_attn.v_proj |
|
Quantizing ... |
|
time 4.45 |
|
error 15016.916015625 |
|
21 self_attn.q_proj |
|
Quantizing ... |
|
time 3.92 |
|
error 21347.455078125 |
|
21 self_attn.o_proj |
|
Quantizing ... |
|
time 4.65 |
|
error 1443.336669921875 |
|
21 mlp.up_proj |
|
Quantizing ... |
|
time 4.79 |
|
error 25449.8359375 |
|
21 mlp.gate_proj |
|
Quantizing ... |
|
time 3.96 |
|
error 25958.3515625 |
|
21 mlp.down_proj |
|
Quantizing ... |
|
time 13.25 |
|
error 3697.61669921875 |
|
22 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 17819.23046875 |
|
22 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 14577.216796875 |
|
22 self_attn.q_proj |
|
Quantizing ... |
|
time 4.57 |
|
error 17515.81640625 |
|
22 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 1200.3115234375 |
|
22 mlp.up_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 27206.17578125 |
|
22 mlp.gate_proj |
|
Quantizing ... |
|
time 4.66 |
|
error 27972.80859375 |
|
22 mlp.down_proj |
|
Quantizing ... |
|
time 15.22 |
|
error 4049.857177734375 |
|
23 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 22719.404296875 |
|
23 self_attn.v_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 17897.3828125 |
|
23 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 22223.16015625 |
|
23 self_attn.o_proj |
|
Quantizing ... |
|
time 5.34 |
|
error 1186.746826171875 |
|
23 mlp.up_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 28716.908203125 |
|
23 mlp.gate_proj |
|
Quantizing ... |
|
time 4.67 |
|
error 29901.70703125 |
|
23 mlp.down_proj |
|
Quantizing ... |
|
time 15.15 |
|
error 4423.9609375 |
|
24 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 21154.1796875 |
|
24 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 17285.16015625 |
|
24 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 20868.7890625 |
|
24 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 1467.7769775390625 |
|
24 mlp.up_proj |
|
Quantizing ... |
|
time 5.48 |
|
error 29462.51953125 |
|
24 mlp.gate_proj |
|
Quantizing ... |
|
time 4.66 |
|
error 30764.58984375 |
|
24 mlp.down_proj |
|
Quantizing ... |
|
time 15.18 |
|
error 4902.71728515625 |
|
25 self_attn.k_proj |
|
Quantizing ... |
|
time 5.49 |
|
error 22048.046875 |
|
25 self_attn.v_proj |
|
Quantizing ... |
|
time 4.57 |
|
error 16228.04296875 |
|
25 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 21143.0 |
|
25 self_attn.o_proj |
|
Quantizing ... |
|
time 5.34 |
|
error 2139.94775390625 |
|
25 mlp.up_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 30151.56640625 |
|
25 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 32173.623046875 |
|
25 mlp.down_proj |
|
Quantizing ... |
|
time 15.19 |
|
error 5837.19970703125 |
|
26 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 23134.4375 |
|
26 self_attn.v_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 20564.1015625 |
|
26 self_attn.q_proj |
|
Quantizing ... |
|
time 4.57 |
|
error 22738.328125 |
|
26 self_attn.o_proj |
|
Quantizing ... |
|
time 5.32 |
|
error 1489.8433837890625 |
|
26 mlp.up_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 32375.87890625 |
|
26 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 35263.8046875 |
|
26 mlp.down_proj |
|
Quantizing ... |
|
time 15.16 |
|
error 6332.103515625 |
|
27 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 18952.2265625 |
|
27 self_attn.v_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 17621.71875 |
|
27 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 18678.576171875 |
|
27 self_attn.o_proj |
|
Quantizing ... |
|
time 5.33 |
|
error 948.7327880859375 |
|
27 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 34296.54296875 |
|
27 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 38023.46875 |
|
27 mlp.down_proj |
|
Quantizing ... |
|
time 15.14 |
|
error 6713.2685546875 |
|
28 self_attn.k_proj |
|
Quantizing ... |
|
time 5.44 |
|
error 21827.68359375 |
|
28 self_attn.v_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 21026.181640625 |
|
28 self_attn.q_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 21629.984375 |
|
28 self_attn.o_proj |
|
Quantizing ... |
|
time 5.28 |
|
error 1200.962158203125 |
|
28 mlp.up_proj |
|
Quantizing ... |
|
time 4.73 |
|
error 35941.671875 |
|
28 mlp.gate_proj |
|
Quantizing ... |
|
time 3.89 |
|
error 40337.671875 |
|
28 mlp.down_proj |
|
Quantizing ... |
|
time 13.09 |
|
error 7115.2939453125 |
|
29 self_attn.k_proj |
|
Quantizing ... |
|
time 4.71 |
|
error 18534.94921875 |
|
29 self_attn.v_proj |
|
Quantizing ... |
|
time 3.81 |
|
error 18983.3828125 |
|
29 self_attn.q_proj |
|
Quantizing ... |
|
time 3.79 |
|
error 18497.5078125 |
|
29 self_attn.o_proj |
|
Quantizing ... |
|
time 4.56 |
|
error 1055.707763671875 |
|
29 mlp.up_proj |
|
Quantizing ... |
|
time 4.73 |
|
error 37311.1328125 |
|
29 mlp.gate_proj |
|
Quantizing ... |
|
time 3.89 |
|
error 42287.8046875 |
|
29 mlp.down_proj |
|
Quantizing ... |
|
time 13.09 |
|
error 7297.30322265625 |
|
30 self_attn.k_proj |
|
Quantizing ... |
|
time 4.73 |
|
error 19803.091796875 |
|
30 self_attn.v_proj |
|
Quantizing ... |
|
time 3.82 |
|
error 19785.2578125 |
|
30 self_attn.q_proj |
|
Quantizing ... |
|
time 3.79 |
|
error 19760.001953125 |
|
30 self_attn.o_proj |
|
Quantizing ... |
|
time 4.54 |
|
error 1469.172119140625 |
|
30 mlp.up_proj |
|
Quantizing ... |
|
time 4.73 |
|
error 38499.63671875 |
|
30 mlp.gate_proj |
|
Quantizing ... |
|
time 3.89 |
|
error 44179.5 |
|
30 mlp.down_proj |
|
Quantizing ... |
|
time 13.09 |
|
error 7580.81689453125 |
|
31 self_attn.k_proj |
|
Quantizing ... |
|
time 4.73 |
|
error 17820.888671875 |
|
31 self_attn.v_proj |
|
Quantizing ... |
|
time 3.81 |
|
error 18094.15234375 |
|
31 self_attn.q_proj |
|
Quantizing ... |
|
time 3.79 |
|
error 17839.12890625 |
|
31 self_attn.o_proj |
|
Quantizing ... |
|
time 4.55 |
|
error 865.2473754882812 |
|
31 mlp.up_proj |
|
Quantizing ... |
|
time 4.73 |
|
error 39930.4765625 |
|
31 mlp.gate_proj |
|
Quantizing ... |
|
time 3.90 |
|
error 46099.22265625 |
|
31 mlp.down_proj |
|
Quantizing ... |
|
time 13.07 |
|
error 7856.654296875 |
|
32 self_attn.k_proj |
|
Quantizing ... |
|
time 4.72 |
|
error 17320.09375 |
|
32 self_attn.v_proj |
|
Quantizing ... |
|
time 3.81 |
|
error 18277.00390625 |
|
32 self_attn.q_proj |
|
Quantizing ... |
|
time 3.78 |
|
error 17301.69140625 |
|
32 self_attn.o_proj |
|
Quantizing ... |
|
time 4.55 |
|
error 1483.154052734375 |
|
32 mlp.up_proj |
|
Quantizing ... |
|
time 4.73 |
|
error 40772.7578125 |
|
32 mlp.gate_proj |
|
Quantizing ... |
|
time 3.91 |
|
error 47268.8828125 |
|
32 mlp.down_proj |
|
Quantizing ... |
|
time 15.05 |
|
error 8160.94189453125 |
|
33 self_attn.k_proj |
|
Quantizing ... |
|
time 5.43 |
|
error 21955.51953125 |
|
33 self_attn.v_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 21049.22265625 |
|
33 self_attn.q_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 21577.25390625 |
|
33 self_attn.o_proj |
|
Quantizing ... |
|
time 5.28 |
|
error 1723.208251953125 |
|
33 mlp.up_proj |
|
Quantizing ... |
|
time 5.45 |
|
error 41624.625 |
|
33 mlp.gate_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 48574.9140625 |
|
33 mlp.down_proj |
|
Quantizing ... |
|
time 15.02 |
|
error 8516.169921875 |
|
34 self_attn.k_proj |
|
Quantizing ... |
|
time 5.45 |
|
error 21276.6171875 |
|
34 self_attn.v_proj |
|
Quantizing ... |
|
time 4.53 |
|
error 21605.98828125 |
|
34 self_attn.q_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 21282.73828125 |
|
34 self_attn.o_proj |
|
Quantizing ... |
|
time 5.29 |
|
error 1147.412109375 |
|
34 mlp.up_proj |
|
Quantizing ... |
|
time 5.47 |
|
error 43007.8515625 |
|
34 mlp.gate_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 50336.4453125 |
|
34 mlp.down_proj |
|
Quantizing ... |
|
time 15.03 |
|
error 8801.716796875 |
|
35 self_attn.k_proj |
|
Quantizing ... |
|
time 5.42 |
|
error 19654.30078125 |
|
35 self_attn.v_proj |
|
Quantizing ... |
|
time 4.53 |
|
error 20460.298828125 |
|
35 self_attn.q_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 19565.66015625 |
|
35 self_attn.o_proj |
|
Quantizing ... |
|
time 5.28 |
|
error 1267.8861083984375 |
|
35 mlp.up_proj |
|
Quantizing ... |
|
time 5.44 |
|
error 43844.3984375 |
|
35 mlp.gate_proj |
|
Quantizing ... |
|
time 4.61 |
|
error 51832.9296875 |
|
35 mlp.down_proj |
|
Quantizing ... |
|
time 14.99 |
|
error 9246.138671875 |
|
36 self_attn.k_proj |
|
Quantizing ... |
|
time 5.42 |
|
error 21818.0546875 |
|
36 self_attn.v_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 22970.7421875 |
|
36 self_attn.q_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 21755.69140625 |
|
36 self_attn.o_proj |
|
Quantizing ... |
|
time 5.27 |
|
error 1475.30712890625 |
|
36 mlp.up_proj |
|
Quantizing ... |
|
time 5.45 |
|
error 45133.0 |
|
36 mlp.gate_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 53537.16796875 |
|
36 mlp.down_proj |
|
Quantizing ... |
|
time 15.01 |
|
error 9547.609375 |
|
37 self_attn.k_proj |
|
Quantizing ... |
|
time 5.42 |
|
error 19679.92578125 |
|
37 self_attn.v_proj |
|
Quantizing ... |
|
time 4.49 |
|
error 21583.8515625 |
|
37 self_attn.q_proj |
|
Quantizing ... |
|
time 4.46 |
|
error 19695.486328125 |
|
37 self_attn.o_proj |
|
Quantizing ... |
|
time 5.27 |
|
error 1159.252197265625 |
|
37 mlp.up_proj |
|
Quantizing ... |
|
time 5.47 |
|
error 46412.2890625 |
|
37 mlp.gate_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 54849.98828125 |
|
37 mlp.down_proj |
|
Quantizing ... |
|
time 15.02 |
|
error 9819.6611328125 |
|
38 self_attn.k_proj |
|
Quantizing ... |
|
time 5.44 |
|
error 18576.4296875 |
|
38 self_attn.v_proj |
|
Quantizing ... |
|
time 4.53 |
|
error 22137.95703125 |
|
38 self_attn.q_proj |
|
Quantizing ... |
|
time 4.50 |
|
error 18715.9453125 |
|
38 self_attn.o_proj |
|
Quantizing ... |
|
time 5.27 |
|
error 1157.175537109375 |
|
38 mlp.up_proj |
|
Quantizing ... |
|
time 5.47 |
|
error 47867.8359375 |
|
38 mlp.gate_proj |
|
Quantizing ... |
|
time 4.63 |
|
error 56564.65625 |
|
38 mlp.down_proj |
|
Quantizing ... |
|
time 15.05 |
|
error 10064.912109375 |
|
39 self_attn.k_proj |
|
Quantizing ... |
|
time 5.43 |
|
error 18319.57421875 |
|
39 self_attn.v_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 21200.6953125 |
|
39 self_attn.q_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 18492.78515625 |
|
39 self_attn.o_proj |
|
Quantizing ... |
|
time 5.28 |
|
error 871.6390380859375 |
|
39 mlp.up_proj |
|
Quantizing ... |
|
time 5.49 |
|
error 49243.14453125 |
|
39 mlp.gate_proj |
|
Quantizing ... |
|
time 4.64 |
|
error 57997.484375 |
|
39 mlp.down_proj |
|
Quantizing ... |
|
time 15.02 |
|
error 10489.1953125 |
|
40 self_attn.k_proj |
|
Quantizing ... |
|
time 5.45 |
|
error 19263.89453125 |
|
40 self_attn.v_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 23175.212890625 |
|
40 self_attn.q_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 19302.951171875 |
|
40 self_attn.o_proj |
|
Quantizing ... |
|
time 5.31 |
|
error 1094.509033203125 |
|
40 mlp.up_proj |
|
Quantizing ... |
|
time 5.48 |
|
error 50629.12109375 |
|
40 mlp.gate_proj |
|
Quantizing ... |
|
time 4.63 |
|
error 59236.71875 |
|
40 mlp.down_proj |
|
Quantizing ... |
|
time 15.04 |
|
error 10625.0810546875 |
|
41 self_attn.k_proj |
|
Quantizing ... |
|
time 5.45 |
|
error 16903.75 |
|
41 self_attn.v_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 20939.875 |
|
41 self_attn.q_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 17243.86328125 |
|
41 self_attn.o_proj |
|
Quantizing ... |
|
time 5.28 |
|
error 1089.82421875 |
|
41 mlp.up_proj |
|
Quantizing ... |
|
time 5.46 |
|
error 51843.125 |
|
41 mlp.gate_proj |
|
Quantizing ... |
|
time 4.63 |
|
error 60519.8515625 |
|
41 mlp.down_proj |
|
Quantizing ... |
|
time 15.00 |
|
error 10861.064453125 |
|
42 self_attn.k_proj |
|
Quantizing ... |
|
time 5.42 |
|
error 15238.4775390625 |
|
42 self_attn.v_proj |
|
Quantizing ... |
|
time 4.51 |
|
error 18795.26171875 |
|
42 self_attn.q_proj |
|
Quantizing ... |
|
time 4.52 |
|
error 15420.541015625 |
|
42 self_attn.o_proj |
|
Quantizing ... |
|
time 5.29 |
|
error 784.9866943359375 |
|
42 mlp.up_proj |
|
Quantizing ... |
|
time 5.46 |
|
error 53463.21875 |
|
42 mlp.gate_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 62206.3515625 |
|
42 mlp.down_proj |
|
Quantizing ... |
|
time 15.02 |
|
error 11189.00390625 |
|
43 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 20108.013671875 |
|
43 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 23657.625 |
|
43 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 20269.19140625 |
|
43 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 1607.70849609375 |
|
43 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 55084.65234375 |
|
43 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 64042.7734375 |
|
43 mlp.down_proj |
|
Quantizing ... |
|
time 15.19 |
|
error 11612.2919921875 |
|
44 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 17356.1171875 |
|
44 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 21160.546875 |
|
44 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 17603.23828125 |
|
44 self_attn.o_proj |
|
Quantizing ... |
|
time 5.34 |
|
error 1255.604248046875 |
|
44 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 56339.96875 |
|
44 mlp.gate_proj |
|
Quantizing ... |
|
time 4.70 |
|
error 65163.125 |
|
44 mlp.down_proj |
|
Quantizing ... |
|
time 15.20 |
|
error 12064.7138671875 |
|
45 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 18117.7890625 |
|
45 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 21808.15625 |
|
45 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 18116.814453125 |
|
45 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 1263.494140625 |
|
45 mlp.up_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 57673.00390625 |
|
45 mlp.gate_proj |
|
Quantizing ... |
|
time 4.69 |
|
error 66421.8984375 |
|
45 mlp.down_proj |
|
Quantizing ... |
|
time 15.22 |
|
error 12156.66015625 |
|
46 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 14839.7373046875 |
|
46 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 18257.892578125 |
|
46 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 15213.90625 |
|
46 self_attn.o_proj |
|
Quantizing ... |
|
time 5.34 |
|
error 785.2659912109375 |
|
46 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 58797.3125 |
|
46 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 67662.4375 |
|
46 mlp.down_proj |
|
Quantizing ... |
|
time 15.22 |
|
error 12578.89453125 |
|
47 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 15980.6748046875 |
|
47 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 20399.40625 |
|
47 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 16314.845703125 |
|
47 self_attn.o_proj |
|
Quantizing ... |
|
time 5.34 |
|
error 994.2510375976562 |
|
47 mlp.up_proj |
|
Quantizing ... |
|
time 5.55 |
|
error 60194.90625 |
|
47 mlp.gate_proj |
|
Quantizing ... |
|
time 4.69 |
|
error 69083.8046875 |
|
47 mlp.down_proj |
|
Quantizing ... |
|
time 15.22 |
|
error 12717.556640625 |
|
48 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 16732.21484375 |
|
48 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 20331.4609375 |
|
48 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 16550.6796875 |
|
48 self_attn.o_proj |
|
Quantizing ... |
|
time 5.37 |
|
error 830.1424560546875 |
|
48 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 61524.6640625 |
|
48 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 69989.859375 |
|
48 mlp.down_proj |
|
Quantizing ... |
|
time 15.25 |
|
error 12867.81640625 |
|
49 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 20927.19921875 |
|
49 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 26998.1875 |
|
49 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 21068.333984375 |
|
49 self_attn.o_proj |
|
Quantizing ... |
|
time 5.33 |
|
error 2200.43896484375 |
|
49 mlp.up_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 62553.7578125 |
|
49 mlp.gate_proj |
|
Quantizing ... |
|
time 4.68 |
|
error 70532.5 |
|
49 mlp.down_proj |
|
Quantizing ... |
|
time 15.24 |
|
error 13343.169921875 |
|
50 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 19778.150390625 |
|
50 self_attn.v_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 24383.1171875 |
|
50 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 19944.087890625 |
|
50 self_attn.o_proj |
|
Quantizing ... |
|
time 5.34 |
|
error 1318.1099853515625 |
|
50 mlp.up_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 63662.0 |
|
50 mlp.gate_proj |
|
Quantizing ... |
|
time 4.71 |
|
error 70943.359375 |
|
50 mlp.down_proj |
|
Quantizing ... |
|
time 15.23 |
|
error 13458.142578125 |
|
51 self_attn.k_proj |
|
Quantizing ... |
|
time 5.51 |
|
error 19395.578125 |
|
51 self_attn.v_proj |
|
Quantizing ... |
|
time 4.61 |
|
error 23529.4296875 |
|
51 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 19515.13671875 |
|
51 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 1308.77294921875 |
|
51 mlp.up_proj |
|
Quantizing ... |
|
time 5.54 |
|
error 64472.87890625 |
|
51 mlp.gate_proj |
|
Quantizing ... |
|
time 4.70 |
|
error 71128.5078125 |
|
51 mlp.down_proj |
|
Quantizing ... |
|
time 15.24 |
|
error 13829.0478515625 |
|
52 self_attn.k_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 15992.076171875 |
|
52 self_attn.v_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 19542.921875 |
|
52 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 16278.9853515625 |
|
52 self_attn.o_proj |
|
Quantizing ... |
|
time 5.36 |
|
error 1193.5657958984375 |
|
52 mlp.up_proj |
|
Quantizing ... |
|
time 5.54 |
|
error 64697.9375 |
|
52 mlp.gate_proj |
|
Quantizing ... |
|
time 4.71 |
|
error 70637.9453125 |
|
52 mlp.down_proj |
|
Quantizing ... |
|
time 15.26 |
|
error 14138.74609375 |
|
53 self_attn.k_proj |
|
Quantizing ... |
|
time 5.52 |
|
error 15789.552734375 |
|
53 self_attn.v_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 18145.87109375 |
|
53 self_attn.q_proj |
|
Quantizing ... |
|
time 4.59 |
|
error 15677.279296875 |
|
53 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 987.3407592773438 |
|
53 mlp.up_proj |
|
Quantizing ... |
|
time 5.54 |
|
error 64783.2578125 |
|
53 mlp.gate_proj |
|
Quantizing ... |
|
time 4.70 |
|
error 70183.8125 |
|
53 mlp.down_proj |
|
Quantizing ... |
|
time 15.15 |
|
error 14546.9462890625 |
|
54 self_attn.k_proj |
|
Quantizing ... |
|
time 5.57 |
|
error 17387.84375 |
|
54 self_attn.v_proj |
|
Quantizing ... |
|
time 4.57 |
|
error 21607.25390625 |
|
54 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 17343.09375 |
|
54 self_attn.o_proj |
|
Quantizing ... |
|
time 5.33 |
|
error 1196.340576171875 |
|
54 mlp.up_proj |
|
Quantizing ... |
|
time 5.55 |
|
error 65141.66015625 |
|
54 mlp.gate_proj |
|
Quantizing ... |
|
time 4.72 |
|
error 69816.265625 |
|
54 mlp.down_proj |
|
Quantizing ... |
|
time 15.27 |
|
error 14795.326171875 |
|
55 self_attn.k_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 17946.92578125 |
|
55 self_attn.v_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 21772.1796875 |
|
55 self_attn.q_proj |
|
Quantizing ... |
|
time 4.61 |
|
error 18037.7890625 |
|
55 self_attn.o_proj |
|
Quantizing ... |
|
time 5.37 |
|
error 1498.04150390625 |
|
55 mlp.up_proj |
|
Quantizing ... |
|
time 5.55 |
|
error 65091.9140625 |
|
55 mlp.gate_proj |
|
Quantizing ... |
|
time 4.71 |
|
error 69162.703125 |
|
55 mlp.down_proj |
|
Quantizing ... |
|
time 15.27 |
|
error 16282.494140625 |
|
56 self_attn.k_proj |
|
Quantizing ... |
|
time 5.53 |
|
error 15704.654296875 |
|
56 self_attn.v_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 21285.42578125 |
|
56 self_attn.q_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 15670.8251953125 |
|
56 self_attn.o_proj |
|
Quantizing ... |
|
time 5.38 |
|
error 2344.0654296875 |
|
56 mlp.up_proj |
|
Quantizing ... |
|
time 5.56 |
|
error 64253.71875 |
|
56 mlp.gate_proj |
|
Quantizing ... |
|
time 4.72 |
|
error 67705.203125 |
|
56 mlp.down_proj |
|
Quantizing ... |
|
time 15.32 |
|
error 18808.955078125 |
|
57 self_attn.k_proj |
|
Quantizing ... |
|
time 5.54 |
|
error 15364.15234375 |
|
57 self_attn.v_proj |
|
Quantizing ... |
|
time 4.62 |
|
error 17928.765625 |
|
57 self_attn.q_proj |
|
Quantizing ... |
|
time 4.60 |
|
error 15470.736328125 |
|
57 self_attn.o_proj |
|
Quantizing ... |
|
time 5.38 |
|
error 1377.28271484375 |
|
57 mlp.up_proj |
|
Quantizing ... |
|
time 5.55 |
|
error 61965.84375 |
|
57 mlp.gate_proj |
|
Quantizing ... |
|
time 4.72 |
|
error 65122.2578125 |
|
57 mlp.down_proj |
|
Quantizing ... |
|
time 15.28 |
|
error 23356.546875 |
|
58 self_attn.k_proj |
|
Quantizing ... |
|
time 5.50 |
|
error 12949.615234375 |
|
58 self_attn.v_proj |
|
Quantizing ... |
|
time 4.57 |
|
error 14125.650390625 |
|
58 self_attn.q_proj |
|
Quantizing ... |
|
time 4.58 |
|
error 13172.3046875 |
|
58 self_attn.o_proj |
|
Quantizing ... |
|
time 5.35 |
|
error 1190.738037109375 |
|
58 mlp.up_proj |
|
Quantizing ... |
|
time 4.92 |
|
error 56147.046875 |
|
58 mlp.gate_proj |
|
Quantizing ... |
|
time 4.08 |
|
error 60298.77734375 |
|
58 mlp.down_proj |
|
Quantizing ... |
|
time 13.60 |
|
error 31925.26953125 |
|
59 self_attn.k_proj |
|
Quantizing ... |
|
time 4.89 |
|
error 11584.0322265625 |
|
59 self_attn.v_proj |
|
Quantizing ... |
|
time 3.98 |
|
error 10727.130859375 |
|
59 self_attn.q_proj |
|
Quantizing ... |
|
time 3.98 |
|
error 11997.470703125 |
|
59 self_attn.o_proj |
|
Quantizing ... |
|
time 4.87 |
|
error 2141.62890625 |
|
59 mlp.up_proj |
|
Quantizing ... |
|
time 5.01 |
|
error 43433.7265625 |
|
59 mlp.gate_proj |
|
Quantizing ... |
|
time 4.22 |
|
error 46841.99609375 |
|
59 mlp.down_proj |
|
Quantizing ... |
|
time 13.90 |
|
error 51444.9765625 |
|
4572.00866651535 |
|
Found cached dataset wikitext (/home/usbhost/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126) |
|
Found cached dataset wikitext (/home/usbhost/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126) |
|
wikitext2 |
|
Evaluating ... |
|
0 |
|
1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 |
|
53 |
|
54 |
|
55 |
|
56 |
|
57 |
|
58 |
|
59 |
|
4.230341911315918 |
|
Found cached dataset ptb_text_only (/home/usbhost/.cache/huggingface/datasets/ptb_text_only/penn_treebank/1.1.0/8d1b97746fb9765d140e569ec5ddd35e20af4d37761f5e1bf357ea0b081f2c1f) |
|
Found cached dataset ptb_text_only (/home/usbhost/.cache/huggingface/datasets/ptb_text_only/penn_treebank/1.1.0/8d1b97746fb9765d140e569ec5ddd35e20af4d37761f5e1bf357ea0b081f2c1f) |
|
ptb-new |
|
Evaluating ... |
|
0 |
|
1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 |
|
53 |
|
54 |
|
55 |
|
56 |
|
57 |
|
58 |
|
59 |
|
8.243087768554688 |
|
Found cached dataset json (/home/usbhost/.cache/huggingface/datasets/allenai___json/allenai--c4-6fbe877195f42de5/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51) |
|
Found cached dataset json (/home/usbhost/.cache/huggingface/datasets/allenai___json/allenai--c4-efc3d4f4606f44bd/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51) |
|
c4-new |
|
Evaluating ... |
|
0 |
|
1 |
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 |
|
37 |
|
38 |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 |
|
47 |
|
48 |
|
49 |
|
50 |
|
51 |
|
52 |
|
53 |
|
54 |
|
55 |
|
56 |
|
57 |
|
58 |
|
59 |
|
6.231330394744873 |
|
Packing ... |
|
model.layers.0.self_attn.k_proj |
|
model.layers.0.self_attn.o_proj |
|
model.layers.0.self_attn.q_proj |
|
model.layers.0.self_attn.v_proj |
|
model.layers.0.mlp.down_proj |
|
model.layers.0.mlp.gate_proj |
|
model.layers.0.mlp.up_proj |
|
model.layers.1.self_attn.k_proj |
|
model.layers.1.self_attn.o_proj |
|
model.layers.1.self_attn.q_proj |
|
model.layers.1.self_attn.v_proj |
|
model.layers.1.mlp.down_proj |
|
model.layers.1.mlp.gate_proj |
|
model.layers.1.mlp.up_proj |
|
model.layers.2.self_attn.k_proj |
|
model.layers.2.self_attn.o_proj |
|
model.layers.2.self_attn.q_proj |
|
model.layers.2.self_attn.v_proj |
|
model.layers.2.mlp.down_proj |
|
model.layers.2.mlp.gate_proj |
|
model.layers.2.mlp.up_proj |
|
model.layers.3.self_attn.k_proj |
|
model.layers.3.self_attn.o_proj |
|
model.layers.3.self_attn.q_proj |
|
model.layers.3.self_attn.v_proj |
|
model.layers.3.mlp.down_proj |
|
model.layers.3.mlp.gate_proj |
|
model.layers.3.mlp.up_proj |
|
model.layers.4.self_attn.k_proj |
|
model.layers.4.self_attn.o_proj |
|
model.layers.4.self_attn.q_proj |
|
model.layers.4.self_attn.v_proj |
|
model.layers.4.mlp.down_proj |
|
model.layers.4.mlp.gate_proj |
|
model.layers.4.mlp.up_proj |
|
model.layers.5.self_attn.k_proj |
|
model.layers.5.self_attn.o_proj |
|
model.layers.5.self_attn.q_proj |
|
model.layers.5.self_attn.v_proj |
|
model.layers.5.mlp.down_proj |
|
model.layers.5.mlp.gate_proj |
|
model.layers.5.mlp.up_proj |
|
model.layers.6.self_attn.k_proj |
|
model.layers.6.self_attn.o_proj |
|
model.layers.6.self_attn.q_proj |
|
model.layers.6.self_attn.v_proj |
|
model.layers.6.mlp.down_proj |
|
model.layers.6.mlp.gate_proj |
|
model.layers.6.mlp.up_proj |
|
model.layers.7.self_attn.k_proj |
|
model.layers.7.self_attn.o_proj |
|
model.layers.7.self_attn.q_proj |
|
model.layers.7.self_attn.v_proj |
|
model.layers.7.mlp.down_proj |
|
model.layers.7.mlp.gate_proj |
|
model.layers.7.mlp.up_proj |
|
model.layers.8.self_attn.k_proj |
|
model.layers.8.self_attn.o_proj |
|
model.layers.8.self_attn.q_proj |
|
model.layers.8.self_attn.v_proj |
|
model.layers.8.mlp.down_proj |
|
model.layers.8.mlp.gate_proj |
|
model.layers.8.mlp.up_proj |
|
model.layers.9.self_attn.k_proj |
|
model.layers.9.self_attn.o_proj |
|
model.layers.9.self_attn.q_proj |
|
model.layers.9.self_attn.v_proj |
|
model.layers.9.mlp.down_proj |
|
model.layers.9.mlp.gate_proj |
|
model.layers.9.mlp.up_proj |
|
model.layers.10.self_attn.k_proj |
|
model.layers.10.self_attn.o_proj |
|
model.layers.10.self_attn.q_proj |
|
model.layers.10.self_attn.v_proj |
|
model.layers.10.mlp.down_proj |
|
model.layers.10.mlp.gate_proj |
|
model.layers.10.mlp.up_proj |
|
model.layers.11.self_attn.k_proj |
|
model.layers.11.self_attn.o_proj |
|
model.layers.11.self_attn.q_proj |
|
model.layers.11.self_attn.v_proj |
|
model.layers.11.mlp.down_proj |
|
model.layers.11.mlp.gate_proj |
|
model.layers.11.mlp.up_proj |
|
model.layers.12.self_attn.k_proj |
|
model.layers.12.self_attn.o_proj |
|
model.layers.12.self_attn.q_proj |
|
model.layers.12.self_attn.v_proj |
|
model.layers.12.mlp.down_proj |
|
model.layers.12.mlp.gate_proj |
|
model.layers.12.mlp.up_proj |
|
model.layers.13.self_attn.k_proj |
|
model.layers.13.self_attn.o_proj |
|
model.layers.13.self_attn.q_proj |
|
model.layers.13.self_attn.v_proj |
|
model.layers.13.mlp.down_proj |
|
model.layers.13.mlp.gate_proj |
|
model.layers.13.mlp.up_proj |
|
model.layers.14.self_attn.k_proj |
|
model.layers.14.self_attn.o_proj |
|
model.layers.14.self_attn.q_proj |
|
model.layers.14.self_attn.v_proj |
|
model.layers.14.mlp.down_proj |
|
model.layers.14.mlp.gate_proj |
|
model.layers.14.mlp.up_proj |
|
model.layers.15.self_attn.k_proj |
|
model.layers.15.self_attn.o_proj |
|
model.layers.15.self_attn.q_proj |
|
model.layers.15.self_attn.v_proj |
|
model.layers.15.mlp.down_proj |
|
model.layers.15.mlp.gate_proj |
|
model.layers.15.mlp.up_proj |
|
model.layers.16.self_attn.k_proj |
|
model.layers.16.self_attn.o_proj |
|
model.layers.16.self_attn.q_proj |
|
model.layers.16.self_attn.v_proj |
|
model.layers.16.mlp.down_proj |
|
model.layers.16.mlp.gate_proj |
|
model.layers.16.mlp.up_proj |
|
model.layers.17.self_attn.k_proj |
|
model.layers.17.self_attn.o_proj |
|
model.layers.17.self_attn.q_proj |
|
model.layers.17.self_attn.v_proj |
|
model.layers.17.mlp.down_proj |
|
model.layers.17.mlp.gate_proj |
|
model.layers.17.mlp.up_proj |
|
model.layers.18.self_attn.k_proj |
|
model.layers.18.self_attn.o_proj |
|
model.layers.18.self_attn.q_proj |
|
model.layers.18.self_attn.v_proj |
|
model.layers.18.mlp.down_proj |
|
model.layers.18.mlp.gate_proj |
|
model.layers.18.mlp.up_proj |
|
model.layers.19.self_attn.k_proj |
|
model.layers.19.self_attn.o_proj |
|
model.layers.19.self_attn.q_proj |
|
model.layers.19.self_attn.v_proj |
|
model.layers.19.mlp.down_proj |
|
model.layers.19.mlp.gate_proj |
|
model.layers.19.mlp.up_proj |
|
model.layers.20.self_attn.k_proj |
|
model.layers.20.self_attn.o_proj |
|
model.layers.20.self_attn.q_proj |
|
model.layers.20.self_attn.v_proj |
|
model.layers.20.mlp.down_proj |
|
model.layers.20.mlp.gate_proj |
|
model.layers.20.mlp.up_proj |
|
model.layers.21.self_attn.k_proj |
|
model.layers.21.self_attn.o_proj |
|
model.layers.21.self_attn.q_proj |
|
model.layers.21.self_attn.v_proj |
|
model.layers.21.mlp.down_proj |
|
model.layers.21.mlp.gate_proj |
|
model.layers.21.mlp.up_proj |
|
model.layers.22.self_attn.k_proj |
|
model.layers.22.self_attn.o_proj |
|
model.layers.22.self_attn.q_proj |
|
model.layers.22.self_attn.v_proj |
|
model.layers.22.mlp.down_proj |
|
model.layers.22.mlp.gate_proj |
|
model.layers.22.mlp.up_proj |
|
model.layers.23.self_attn.k_proj |
|
model.layers.23.self_attn.o_proj |
|
model.layers.23.self_attn.q_proj |
|
model.layers.23.self_attn.v_proj |
|
model.layers.23.mlp.down_proj |
|
model.layers.23.mlp.gate_proj |
|
model.layers.23.mlp.up_proj |
|
model.layers.24.self_attn.k_proj |
|
model.layers.24.self_attn.o_proj |
|
model.layers.24.self_attn.q_proj |
|
model.layers.24.self_attn.v_proj |
|
model.layers.24.mlp.down_proj |
|
model.layers.24.mlp.gate_proj |
|
model.layers.24.mlp.up_proj |
|
model.layers.25.self_attn.k_proj |
|
model.layers.25.self_attn.o_proj |
|
model.layers.25.self_attn.q_proj |
|
model.layers.25.self_attn.v_proj |
|
model.layers.25.mlp.down_proj |
|
model.layers.25.mlp.gate_proj |
|
model.layers.25.mlp.up_proj |
|
model.layers.26.self_attn.k_proj |
|
model.layers.26.self_attn.o_proj |
|
model.layers.26.self_attn.q_proj |
|
model.layers.26.self_attn.v_proj |
|
model.layers.26.mlp.down_proj |
|
model.layers.26.mlp.gate_proj |
|
model.layers.26.mlp.up_proj |
|
model.layers.27.self_attn.k_proj |
|
model.layers.27.self_attn.o_proj |
|
model.layers.27.self_attn.q_proj |
|
model.layers.27.self_attn.v_proj |
|
model.layers.27.mlp.down_proj |
|
model.layers.27.mlp.gate_proj |
|
model.layers.27.mlp.up_proj |
|
model.layers.28.self_attn.k_proj |
|
model.layers.28.self_attn.o_proj |
|
model.layers.28.self_attn.q_proj |
|
model.layers.28.self_attn.v_proj |
|
model.layers.28.mlp.down_proj |
|
model.layers.28.mlp.gate_proj |
|
model.layers.28.mlp.up_proj |
|
model.layers.29.self_attn.k_proj |
|
model.layers.29.self_attn.o_proj |
|
model.layers.29.self_attn.q_proj |
|
model.layers.29.self_attn.v_proj |
|
model.layers.29.mlp.down_proj |
|
model.layers.29.mlp.gate_proj |
|
model.layers.29.mlp.up_proj |
|
model.layers.30.self_attn.k_proj |
|
model.layers.30.self_attn.o_proj |
|
model.layers.30.self_attn.q_proj |
|
model.layers.30.self_attn.v_proj |
|
model.layers.30.mlp.down_proj |
|
model.layers.30.mlp.gate_proj |
|
model.layers.30.mlp.up_proj |
|
model.layers.31.self_attn.k_proj |
|
model.layers.31.self_attn.o_proj |
|
model.layers.31.self_attn.q_proj |
|
model.layers.31.self_attn.v_proj |
|
model.layers.31.mlp.down_proj |
|
model.layers.31.mlp.gate_proj |
|
model.layers.31.mlp.up_proj |
|
model.layers.32.self_attn.k_proj |
|
model.layers.32.self_attn.o_proj |
|
model.layers.32.self_attn.q_proj |
|
model.layers.32.self_attn.v_proj |
|
model.layers.32.mlp.down_proj |
|
model.layers.32.mlp.gate_proj |
|
model.layers.32.mlp.up_proj |
|
model.layers.33.self_attn.k_proj |
|
model.layers.33.self_attn.o_proj |
|
model.layers.33.self_attn.q_proj |
|
model.layers.33.self_attn.v_proj |
|
model.layers.33.mlp.down_proj |
|
model.layers.33.mlp.gate_proj |
|
model.layers.33.mlp.up_proj |
|
model.layers.34.self_attn.k_proj |
|
model.layers.34.self_attn.o_proj |
|
model.layers.34.self_attn.q_proj |
|
model.layers.34.self_attn.v_proj |
|
model.layers.34.mlp.down_proj |
|
model.layers.34.mlp.gate_proj |
|
model.layers.34.mlp.up_proj |
|
model.layers.35.self_attn.k_proj |
|
model.layers.35.self_attn.o_proj |
|
model.layers.35.self_attn.q_proj |
|
model.layers.35.self_attn.v_proj |
|
model.layers.35.mlp.down_proj |
|
model.layers.35.mlp.gate_proj |
|
model.layers.35.mlp.up_proj |
|
model.layers.36.self_attn.k_proj |
|
model.layers.36.self_attn.o_proj |
|
model.layers.36.self_attn.q_proj |
|
model.layers.36.self_attn.v_proj |
|
model.layers.36.mlp.down_proj |
|
model.layers.36.mlp.gate_proj |
|
model.layers.36.mlp.up_proj |
|
model.layers.37.self_attn.k_proj |
|
model.layers.37.self_attn.o_proj |
|
model.layers.37.self_attn.q_proj |
|
model.layers.37.self_attn.v_proj |
|
model.layers.37.mlp.down_proj |
|
model.layers.37.mlp.gate_proj |
|
model.layers.37.mlp.up_proj |
|
model.layers.38.self_attn.k_proj |
|
model.layers.38.self_attn.o_proj |
|
model.layers.38.self_attn.q_proj |
|
model.layers.38.self_attn.v_proj |
|
model.layers.38.mlp.down_proj |
|
model.layers.38.mlp.gate_proj |
|
model.layers.38.mlp.up_proj |
|
model.layers.39.self_attn.k_proj |
|
model.layers.39.self_attn.o_proj |
|
model.layers.39.self_attn.q_proj |
|
model.layers.39.self_attn.v_proj |
|
model.layers.39.mlp.down_proj |
|
model.layers.39.mlp.gate_proj |
|
model.layers.39.mlp.up_proj |
|
model.layers.40.self_attn.k_proj |
|
model.layers.40.self_attn.o_proj |
|
model.layers.40.self_attn.q_proj |
|
model.layers.40.self_attn.v_proj |
|
model.layers.40.mlp.down_proj |
|
model.layers.40.mlp.gate_proj |
|
model.layers.40.mlp.up_proj |
|
model.layers.41.self_attn.k_proj |
|
model.layers.41.self_attn.o_proj |
|
model.layers.41.self_attn.q_proj |
|
model.layers.41.self_attn.v_proj |
|
model.layers.41.mlp.down_proj |
|
model.layers.41.mlp.gate_proj |
|
model.layers.41.mlp.up_proj |
|
model.layers.42.self_attn.k_proj |
|
model.layers.42.self_attn.o_proj |
|
model.layers.42.self_attn.q_proj |
|
model.layers.42.self_attn.v_proj |
|
model.layers.42.mlp.down_proj |
|
model.layers.42.mlp.gate_proj |
|
model.layers.42.mlp.up_proj |
|
model.layers.43.self_attn.k_proj |
|
model.layers.43.self_attn.o_proj |
|
model.layers.43.self_attn.q_proj |
|
model.layers.43.self_attn.v_proj |
|
model.layers.43.mlp.down_proj |
|
model.layers.43.mlp.gate_proj |
|
model.layers.43.mlp.up_proj |
|
model.layers.44.self_attn.k_proj |
|
model.layers.44.self_attn.o_proj |
|
model.layers.44.self_attn.q_proj |
|
model.layers.44.self_attn.v_proj |
|
model.layers.44.mlp.down_proj |
|
model.layers.44.mlp.gate_proj |
|
model.layers.44.mlp.up_proj |
|
model.layers.45.self_attn.k_proj |
|
model.layers.45.self_attn.o_proj |
|
model.layers.45.self_attn.q_proj |
|
model.layers.45.self_attn.v_proj |
|
model.layers.45.mlp.down_proj |
|
model.layers.45.mlp.gate_proj |
|
model.layers.45.mlp.up_proj |
|
model.layers.46.self_attn.k_proj |
|
model.layers.46.self_attn.o_proj |
|
model.layers.46.self_attn.q_proj |
|
model.layers.46.self_attn.v_proj |
|
model.layers.46.mlp.down_proj |
|
model.layers.46.mlp.gate_proj |
|
model.layers.46.mlp.up_proj |
|
model.layers.47.self_attn.k_proj |
|
model.layers.47.self_attn.o_proj |
|
model.layers.47.self_attn.q_proj |
|
model.layers.47.self_attn.v_proj |
|
model.layers.47.mlp.down_proj |
|
model.layers.47.mlp.gate_proj |
|
model.layers.47.mlp.up_proj |
|
model.layers.48.self_attn.k_proj |
|
model.layers.48.self_attn.o_proj |
|
model.layers.48.self_attn.q_proj |
|
model.layers.48.self_attn.v_proj |
|
model.layers.48.mlp.down_proj |
|
model.layers.48.mlp.gate_proj |
|
model.layers.48.mlp.up_proj |
|
model.layers.49.self_attn.k_proj |
|
model.layers.49.self_attn.o_proj |
|
model.layers.49.self_attn.q_proj |
|
model.layers.49.self_attn.v_proj |
|
model.layers.49.mlp.down_proj |
|
model.layers.49.mlp.gate_proj |
|
model.layers.49.mlp.up_proj |
|
model.layers.50.self_attn.k_proj |
|
model.layers.50.self_attn.o_proj |
|
model.layers.50.self_attn.q_proj |
|
model.layers.50.self_attn.v_proj |
|
model.layers.50.mlp.down_proj |
|
model.layers.50.mlp.gate_proj |
|
model.layers.50.mlp.up_proj |
|
model.layers.51.self_attn.k_proj |
|
model.layers.51.self_attn.o_proj |
|
model.layers.51.self_attn.q_proj |
|
model.layers.51.self_attn.v_proj |
|
model.layers.51.mlp.down_proj |
|
model.layers.51.mlp.gate_proj |
|
model.layers.51.mlp.up_proj |
|
model.layers.52.self_attn.k_proj |
|
model.layers.52.self_attn.o_proj |
|
model.layers.52.self_attn.q_proj |
|
model.layers.52.self_attn.v_proj |
|
model.layers.52.mlp.down_proj |
|
model.layers.52.mlp.gate_proj |
|
model.layers.52.mlp.up_proj |
|
model.layers.53.self_attn.k_proj |
|
model.layers.53.self_attn.o_proj |
|
model.layers.53.self_attn.q_proj |
|
model.layers.53.self_attn.v_proj |
|
model.layers.53.mlp.down_proj |
|
model.layers.53.mlp.gate_proj |
|
model.layers.53.mlp.up_proj |
|
model.layers.54.self_attn.k_proj |
|
model.layers.54.self_attn.o_proj |
|
model.layers.54.self_attn.q_proj |
|
model.layers.54.self_attn.v_proj |
|
model.layers.54.mlp.down_proj |
|
model.layers.54.mlp.gate_proj |
|
model.layers.54.mlp.up_proj |
|
model.layers.55.self_attn.k_proj |
|
model.layers.55.self_attn.o_proj |
|
model.layers.55.self_attn.q_proj |
|
model.layers.55.self_attn.v_proj |
|
model.layers.55.mlp.down_proj |
|
model.layers.55.mlp.gate_proj |
|
model.layers.55.mlp.up_proj |
|
model.layers.56.self_attn.k_proj |
|
model.layers.56.self_attn.o_proj |
|
model.layers.56.self_attn.q_proj |
|
model.layers.56.self_attn.v_proj |
|
model.layers.56.mlp.down_proj |
|
model.layers.56.mlp.gate_proj |
|
model.layers.56.mlp.up_proj |
|
model.layers.57.self_attn.k_proj |
|
model.layers.57.self_attn.o_proj |
|
model.layers.57.self_attn.q_proj |
|
model.layers.57.self_attn.v_proj |
|
model.layers.57.mlp.down_proj |
|
model.layers.57.mlp.gate_proj |
|
model.layers.57.mlp.up_proj |
|
model.layers.58.self_attn.k_proj |
|
model.layers.58.self_attn.o_proj |
|
model.layers.58.self_attn.q_proj |
|
model.layers.58.self_attn.v_proj |
|
model.layers.58.mlp.down_proj |
|
model.layers.58.mlp.gate_proj |
|
model.layers.58.mlp.up_proj |
|
model.layers.59.self_attn.k_proj |
|
model.layers.59.self_attn.o_proj |
|
model.layers.59.self_attn.q_proj |
|
model.layers.59.self_attn.v_proj |
|
model.layers.59.mlp.down_proj |
|
model.layers.59.mlp.gate_proj |
|
model.layers.59.mlp.up_proj |
|
Done. |
|
|