LRL
commited on
Commit
•
6f285fe
1
Parent(s):
52ab5b1
Init Model
Browse files- cal_data_size.txt +7 -0
- config.json +45 -0
- model.safetensors +3 -0
- quant_log.json +1 -0
- quantize_config.json +16 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
cal_data_size.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ptb:0
|
2 |
+
wikitext2:1024
|
3 |
+
grammar:0
|
4 |
+
dict:0
|
5 |
+
sentiment:0
|
6 |
+
detection:0
|
7 |
+
rate:0
|
config.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/monster/data/model/Yi-1.5-34B",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 7168,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 20480,
|
14 |
+
"max_position_embeddings": 4096,
|
15 |
+
"model_type": "llama",
|
16 |
+
"num_attention_heads": 56,
|
17 |
+
"num_hidden_layers": 60,
|
18 |
+
"num_key_value_heads": 8,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"pretraining_tp": 1,
|
21 |
+
"quantization_config": {
|
22 |
+
"bits": 4,
|
23 |
+
"checkpoint_format": "gptq",
|
24 |
+
"damp_percent": 0.005,
|
25 |
+
"desc_act": false,
|
26 |
+
"group_size": 128,
|
27 |
+
"meta": {
|
28 |
+
"quantizer": "autogptq:0.8.0.dev1"
|
29 |
+
},
|
30 |
+
"model_file_base_name": null,
|
31 |
+
"model_name_or_path": null,
|
32 |
+
"quant_method": "gptq",
|
33 |
+
"static_groups": false,
|
34 |
+
"sym": false,
|
35 |
+
"true_sequential": true
|
36 |
+
},
|
37 |
+
"rms_norm_eps": 1e-06,
|
38 |
+
"rope_scaling": null,
|
39 |
+
"rope_theta": 5000000.0,
|
40 |
+
"tie_word_embeddings": false,
|
41 |
+
"torch_dtype": "float16",
|
42 |
+
"transformers_version": "4.40.2",
|
43 |
+
"use_cache": true,
|
44 |
+
"vocab_size": 64000
|
45 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:714fd312e9e3ede2db2cb9c5f760a7e6a03d00bd32a56a682fe5be7b3ddb4758
|
3 |
+
size 19241180272
|
quant_log.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"layer": 1, "module": "self_attn.k_proj", "avg_loss": 0.019579052925109863, "time": 2.0083673000335693}, {"layer": 1, "module": "self_attn.v_proj", "avg_loss": 0.0004904228262603283, "time": 1.9191021919250488}, {"layer": 1, "module": "self_attn.q_proj", "avg_loss": 0.016233358532190323, "time": 1.8614857196807861}, {"layer": 1, "module": "self_attn.o_proj", "avg_loss": 3.1112802389543504e-05, "time": 1.839848518371582}, {"layer": 1, "module": "mlp.up_proj", "avg_loss": 0.0028566778637468815, "time": 2.2973148822784424}, {"layer": 1, "module": "mlp.gate_proj", "avg_loss": 0.002913618693128228, "time": 2.2560112476348877}, {"layer": 1, "module": "mlp.down_proj", "avg_loss": 1.8501212252886035e-05, "time": 6.2284095287323}, {"layer": 2, "module": "self_attn.k_proj", "avg_loss": 0.003291596192866564, "time": 1.8798041343688965}, {"layer": 2, "module": "self_attn.v_proj", "avg_loss": 0.00012713002797681838, "time": 1.8013911247253418}, {"layer": 2, "module": "self_attn.q_proj", "avg_loss": 0.005386114586144686, "time": 1.8850772380828857}, {"layer": 2, "module": "self_attn.o_proj", "avg_loss": 2.761085852398537e-06, "time": 1.8490169048309326}, {"layer": 2, "module": "mlp.up_proj", "avg_loss": 0.005018490832298994, "time": 1.903074026107788}, {"layer": 2, "module": "mlp.gate_proj", "avg_loss": 0.005198080092668533, "time": 1.911665916442871}, {"layer": 2, "module": "mlp.down_proj", "avg_loss": 0.00010520378418732435, "time": 6.078613042831421}, {"layer": 3, "module": "self_attn.k_proj", "avg_loss": 0.0020864347461611032, "time": 1.864649772644043}, {"layer": 3, "module": "self_attn.v_proj", "avg_loss": 0.00032892581657506526, "time": 1.939770221710205}, {"layer": 3, "module": "self_attn.q_proj", "avg_loss": 0.0045464495196938515, "time": 1.8356659412384033}, {"layer": 3, "module": "self_attn.o_proj", "avg_loss": 4.8812958993948996e-05, "time": 1.9104349613189697}, {"layer": 3, "module": "mlp.up_proj", "avg_loss": 0.01100834459066391, "time": 1.9521610736846924}, {"layer": 3, "module": "mlp.gate_proj", "avg_loss": 0.0110942916944623, "time": 2.29156494140625}, {"layer": 3, "module": "mlp.down_proj", "avg_loss": 1.7225898504257202, "time": 6.056393384933472}, {"layer": 4, "module": "self_attn.k_proj", "avg_loss": 0.008318766951560974, "time": 1.8064334392547607}, {"layer": 4, "module": "self_attn.v_proj", "avg_loss": 0.0018323739059269428, "time": 1.803293228149414}, {"layer": 4, "module": "self_attn.q_proj", "avg_loss": 0.023362228646874428, "time": 1.8234286308288574}, {"layer": 4, "module": "self_attn.o_proj", "avg_loss": 0.0003202600928489119, "time": 1.9684116840362549}, {"layer": 4, "module": "mlp.up_proj", "avg_loss": 0.02653498202562332, "time": 1.9100878238677979}, {"layer": 4, "module": "mlp.gate_proj", "avg_loss": 0.026300273835659027, "time": 1.929164171218872}, {"layer": 4, "module": "mlp.down_proj", "avg_loss": 0.0006237772759050131, "time": 6.098562717437744}, {"layer": 5, "module": "self_attn.k_proj", "avg_loss": 0.009311821311712265, "time": 1.7996103763580322}, {"layer": 5, "module": "self_attn.v_proj", "avg_loss": 0.0035971221514046192, "time": 1.765134334564209}, {"layer": 5, "module": "self_attn.q_proj", "avg_loss": 0.024823933839797974, "time": 1.8353805541992188}, {"layer": 5, "module": "self_attn.o_proj", "avg_loss": 9.472907549934462e-05, "time": 1.7983314990997314}, {"layer": 5, "module": "mlp.up_proj", "avg_loss": 0.0551939457654953, "time": 1.9428186416625977}, {"layer": 5, "module": "mlp.gate_proj", "avg_loss": 0.055896107107400894, "time": 1.9101002216339111}, {"layer": 5, "module": "mlp.down_proj", "avg_loss": 0.0009531793766655028, "time": 6.319254636764526}, {"layer": 6, "module": "self_attn.k_proj", "avg_loss": 0.010899942368268967, "time": 1.8600893020629883}, {"layer": 6, "module": "self_attn.v_proj", "avg_loss": 0.003992087207734585, "time": 1.9119908809661865}, {"layer": 6, "module": "self_attn.q_proj", "avg_loss": 0.030845575034618378, "time": 1.875746250152588}, {"layer": 6, "module": "self_attn.o_proj", "avg_loss": 0.00033230643020942807, "time": 2.0527100563049316}, {"layer": 6, "module": "mlp.up_proj", "avg_loss": 0.07704982161521912, "time": 1.9758269786834717}, {"layer": 6, "module": "mlp.gate_proj", "avg_loss": 0.07865334302186966, "time": 2.102125644683838}, {"layer": 6, "module": "mlp.down_proj", "avg_loss": 0.00133964279666543, "time": 6.273864507675171}, {"layer": 7, "module": "self_attn.k_proj", "avg_loss": 0.03063431940972805, "time": 1.8997118473052979}, {"layer": 7, "module": "self_attn.v_proj", "avg_loss": 0.008891120553016663, "time": 1.8340034484863281}, {"layer": 7, "module": "self_attn.q_proj", "avg_loss": 0.09211224317550659, "time": 1.9565913677215576}, {"layer": 7, "module": "self_attn.o_proj", "avg_loss": 0.0010163704864680767, "time": 2.3646399974823}, {"layer": 7, "module": "mlp.up_proj", "avg_loss": 0.117058165371418, "time": 1.9330379962921143}, {"layer": 7, "module": "mlp.gate_proj", "avg_loss": 0.1199302077293396, "time": 1.946601390838623}, {"layer": 7, "module": "mlp.down_proj", "avg_loss": 0.002333350945264101, "time": 6.185033321380615}, {"layer": 8, "module": "self_attn.k_proj", "avg_loss": 0.03621470183134079, "time": 1.8614501953125}, {"layer": 8, "module": "self_attn.v_proj", "avg_loss": 0.00962606817483902, "time": 2.2438266277313232}, {"layer": 8, "module": "self_attn.q_proj", "avg_loss": 0.11330453306436539, "time": 1.8301668167114258}, {"layer": 8, "module": "self_attn.o_proj", "avg_loss": 0.0012851323699578643, "time": 1.85626220703125}, {"layer": 8, "module": "mlp.up_proj", "avg_loss": 0.16931866109371185, "time": 1.942215919494629}, {"layer": 8, "module": "mlp.gate_proj", "avg_loss": 0.17358526587486267, "time": 2.1364028453826904}, {"layer": 8, "module": "mlp.down_proj", "avg_loss": 0.003487337613478303, "time": 5.899571895599365}, {"layer": 9, "module": "self_attn.k_proj", "avg_loss": 0.032503508031368256, "time": 1.845473051071167}, {"layer": 9, "module": "self_attn.v_proj", "avg_loss": 0.01240577083081007, "time": 1.8250155448913574}, {"layer": 9, "module": "self_attn.q_proj", "avg_loss": 0.09981885552406311, "time": 1.8884878158569336}, {"layer": 9, "module": "self_attn.o_proj", "avg_loss": 0.0006247044075280428, "time": 1.8844690322875977}, {"layer": 9, "module": "mlp.up_proj", "avg_loss": 0.2173147052526474, "time": 2.0000007152557373}, {"layer": 9, "module": "mlp.gate_proj", "avg_loss": 0.223751962184906, "time": 1.9875388145446777}, {"layer": 9, "module": "mlp.down_proj", "avg_loss": 0.004511426202952862, "time": 6.448248863220215}, {"layer": 10, "module": "self_attn.k_proj", "avg_loss": 0.05721554532647133, "time": 1.9054145812988281}, {"layer": 10, "module": "self_attn.v_proj", "avg_loss": 0.01885191537439823, "time": 1.8317420482635498}, {"layer": 10, "module": "self_attn.q_proj", "avg_loss": 0.1978895664215088, "time": 1.9429364204406738}, {"layer": 10, "module": "self_attn.o_proj", "avg_loss": 0.0013331378577277064, "time": 1.8540480136871338}, {"layer": 10, "module": "mlp.up_proj", "avg_loss": 0.2672441899776459, "time": 1.9351160526275635}, {"layer": 10, "module": "mlp.gate_proj", "avg_loss": 0.27654844522476196, "time": 2.0162513256073}, {"layer": 10, "module": "mlp.down_proj", "avg_loss": 0.006315316539257765, "time": 5.981189250946045}, {"layer": 11, "module": "self_attn.k_proj", "avg_loss": 0.06662096083164215, "time": 1.8655681610107422}, {"layer": 11, "module": "self_attn.v_proj", "avg_loss": 0.021019387990236282, "time": 1.838855266571045}, {"layer": 11, "module": "self_attn.q_proj", "avg_loss": 0.23617760837078094, "time": 1.9237422943115234}, {"layer": 11, "module": "self_attn.o_proj", "avg_loss": 0.0013279514387249947, "time": 1.8465075492858887}, {"layer": 11, "module": "mlp.up_proj", "avg_loss": 0.28860437870025635, "time": 1.9319384098052979}, {"layer": 11, "module": "mlp.gate_proj", "avg_loss": 0.29797857999801636, "time": 2.0106923580169678}, {"layer": 11, "module": "mlp.down_proj", "avg_loss": 0.007748777978122234, "time": 6.142757415771484}, {"layer": 12, "module": "self_attn.k_proj", "avg_loss": 0.1174224391579628, "time": 1.958937168121338}, {"layer": 12, "module": "self_attn.v_proj", "avg_loss": 0.03564442694187164, "time": 1.8783559799194336}, {"layer": 12, "module": "self_attn.q_proj", "avg_loss": 0.4557289481163025, "time": 1.877861499786377}, {"layer": 12, "module": "self_attn.o_proj", "avg_loss": 0.002993631409481168, "time": 1.8766658306121826}, {"layer": 12, "module": "mlp.up_proj", "avg_loss": 0.3671264052391052, "time": 1.9092583656311035}, {"layer": 12, "module": "mlp.gate_proj", "avg_loss": 0.37879347801208496, "time": 2.272951126098633}, {"layer": 12, "module": "mlp.down_proj", "avg_loss": 0.010314248502254486, "time": 6.012053728103638}, {"layer": 13, "module": "self_attn.k_proj", "avg_loss": 0.11947764456272125, "time": 1.795612096786499}, {"layer": 13, "module": "self_attn.v_proj", "avg_loss": 0.04001478850841522, "time": 1.8556225299835205}, {"layer": 13, "module": "self_attn.q_proj", "avg_loss": 0.47955411672592163, "time": 1.8338000774383545}, {"layer": 13, "module": "self_attn.o_proj", "avg_loss": 0.0028072057757526636, "time": 1.9405372142791748}, {"layer": 13, "module": "mlp.up_proj", "avg_loss": 0.4376929998397827, "time": 1.9934642314910889}, {"layer": 13, "module": "mlp.gate_proj", "avg_loss": 0.45184576511383057, "time": 1.9674344062805176}, {"layer": 13, "module": "mlp.down_proj", "avg_loss": 0.012834171764552593, "time": 6.270284414291382}, {"layer": 14, "module": "self_attn.k_proj", "avg_loss": 0.10175293684005737, "time": 1.872243881225586}, {"layer": 14, "module": "self_attn.v_proj", "avg_loss": 0.04228831082582474, "time": 1.8770229816436768}, {"layer": 14, "module": "self_attn.q_proj", "avg_loss": 0.40688544511795044, "time": 2.2439467906951904}, {"layer": 14, "module": "self_attn.o_proj", "avg_loss": 0.0020741946063935757, "time": 1.8847260475158691}, {"layer": 14, "module": "mlp.up_proj", "avg_loss": 0.5144519805908203, "time": 2.0803046226501465}, {"layer": 14, "module": "mlp.gate_proj", "avg_loss": 0.534722089767456, "time": 2.16898512840271}, {"layer": 14, "module": "mlp.down_proj", "avg_loss": 0.01542795728892088, "time": 6.191243410110474}, {"layer": 15, "module": "self_attn.k_proj", "avg_loss": 0.13935965299606323, "time": 1.811086893081665}, {"layer": 15, "module": "self_attn.v_proj", "avg_loss": 0.05044064670801163, "time": 1.847536325454712}, {"layer": 15, "module": "self_attn.q_proj", "avg_loss": 0.5608706474304199, "time": 1.8485565185546875}, {"layer": 15, "module": "self_attn.o_proj", "avg_loss": 0.002558318432420492, "time": 1.8890457153320312}, {"layer": 15, "module": "mlp.up_proj", "avg_loss": 0.595039427280426, "time": 2.03692889213562}, {"layer": 15, "module": "mlp.gate_proj", "avg_loss": 0.6213509440422058, "time": 1.968945026397705}, {"layer": 15, "module": "mlp.down_proj", "avg_loss": 0.01809941977262497, "time": 6.200168609619141}, {"layer": 16, "module": "self_attn.k_proj", "avg_loss": 0.11943342536687851, "time": 1.82584547996521}, {"layer": 16, "module": "self_attn.v_proj", "avg_loss": 0.046300239861011505, "time": 1.9623675346374512}, {"layer": 16, "module": "self_attn.q_proj", "avg_loss": 0.46233880519866943, "time": 1.9820945262908936}, {"layer": 16, "module": "self_attn.o_proj", "avg_loss": 0.003362769726663828, "time": 1.9416303634643555}, {"layer": 16, "module": "mlp.up_proj", "avg_loss": 0.6689113974571228, "time": 2.058560371398926}, {"layer": 16, "module": "mlp.gate_proj", "avg_loss": 0.7017173171043396, "time": 1.9888412952423096}, {"layer": 16, "module": "mlp.down_proj", "avg_loss": 0.021423624828457832, "time": 5.915239572525024}, {"layer": 17, "module": "self_attn.k_proj", "avg_loss": 0.1603582203388214, "time": 1.8014087677001953}, {"layer": 17, "module": "self_attn.v_proj", "avg_loss": 0.05247320979833603, "time": 1.8623056411743164}, {"layer": 17, "module": "self_attn.q_proj", "avg_loss": 0.6341845989227295, "time": 1.8633792400360107}, {"layer": 17, "module": "self_attn.o_proj", "avg_loss": 0.004496422596275806, "time": 1.9475469589233398}, {"layer": 17, "module": "mlp.up_proj", "avg_loss": 0.7134124636650085, "time": 1.9754440784454346}, {"layer": 17, "module": "mlp.gate_proj", "avg_loss": 0.7670619487762451, "time": 1.9561679363250732}, {"layer": 17, "module": "mlp.down_proj", "avg_loss": 0.02442336082458496, "time": 6.075518608093262}, {"layer": 18, "module": "self_attn.k_proj", "avg_loss": 0.15324002504348755, "time": 1.8911101818084717}, {"layer": 18, "module": "self_attn.v_proj", "avg_loss": 0.06279868632555008, "time": 1.958611249923706}, {"layer": 18, "module": "self_attn.q_proj", "avg_loss": 0.6743718981742859, "time": 1.8994636535644531}, {"layer": 18, "module": "self_attn.o_proj", "avg_loss": 0.005486147478222847, "time": 1.9295859336853027}, {"layer": 18, "module": "mlp.up_proj", "avg_loss": 0.801851212978363, "time": 1.9502496719360352}, {"layer": 18, "module": "mlp.gate_proj", "avg_loss": 0.8530661463737488, "time": 1.950526475906372}, {"layer": 18, "module": "mlp.down_proj", "avg_loss": 0.029648764058947563, "time": 6.1071813106536865}, {"layer": 19, "module": "self_attn.k_proj", "avg_loss": 0.15967495739459991, "time": 1.8719277381896973}, {"layer": 19, "module": "self_attn.v_proj", "avg_loss": 0.06581241637468338, "time": 1.8246827125549316}, {"layer": 19, "module": "self_attn.q_proj", "avg_loss": 0.6935086250305176, "time": 1.9207665920257568}, {"layer": 19, "module": "self_attn.o_proj", "avg_loss": 0.00739098759368062, "time": 1.8432002067565918}, {"layer": 19, "module": "mlp.up_proj", "avg_loss": 0.9435623288154602, "time": 1.9409887790679932}, {"layer": 19, "module": "mlp.gate_proj", "avg_loss": 1.012582778930664, "time": 1.9592344760894775}, {"layer": 19, "module": "mlp.down_proj", "avg_loss": 0.03968477621674538, "time": 6.0437233448028564}, {"layer": 20, "module": "self_attn.k_proj", "avg_loss": 0.16439256072044373, "time": 2.018282890319824}, {"layer": 20, "module": "self_attn.v_proj", "avg_loss": 0.08629634976387024, "time": 1.8479111194610596}, {"layer": 20, "module": "self_attn.q_proj", "avg_loss": 0.7864455580711365, "time": 1.8896095752716064}, {"layer": 20, "module": "self_attn.o_proj", "avg_loss": 0.007046117447316647, "time": 1.8459668159484863}, {"layer": 20, "module": "mlp.up_proj", "avg_loss": 1.07151198387146, "time": 2.140672206878662}, {"layer": 20, "module": "mlp.gate_proj", "avg_loss": 1.1552679538726807, "time": 2.0155086517333984}, {"layer": 20, "module": "mlp.down_proj", "avg_loss": 0.055621981620788574, "time": 6.057605504989624}, {"layer": 21, "module": "self_attn.k_proj", "avg_loss": 0.22984415292739868, "time": 1.824413776397705}, {"layer": 21, "module": "self_attn.v_proj", "avg_loss": 0.09681503474712372, "time": 1.7901756763458252}, {"layer": 21, "module": "self_attn.q_proj", "avg_loss": 1.094360113143921, "time": 1.8557202816009521}, {"layer": 21, "module": "self_attn.o_proj", "avg_loss": 0.008852583356201649, "time": 1.859896183013916}, {"layer": 21, "module": "mlp.up_proj", "avg_loss": 1.2860665321350098, "time": 1.9599997997283936}, {"layer": 21, "module": "mlp.gate_proj", "avg_loss": 1.437204122543335, "time": 2.00365948677063}, {"layer": 21, "module": "mlp.down_proj", "avg_loss": 0.2200765609741211, "time": 6.15300726890564}, {"layer": 22, "module": "self_attn.k_proj", "avg_loss": 0.21972990036010742, "time": 1.8494553565979004}, {"layer": 22, "module": "self_attn.v_proj", "avg_loss": 0.0790078192949295, "time": 1.839836835861206}, {"layer": 22, "module": "self_attn.q_proj", "avg_loss": 0.974842369556427, "time": 1.8952538967132568}, {"layer": 22, "module": "self_attn.o_proj", "avg_loss": 0.01374007947742939, "time": 1.8831238746643066}, {"layer": 22, "module": "mlp.up_proj", "avg_loss": 1.3790041208267212, "time": 1.9914546012878418}, {"layer": 22, "module": "mlp.gate_proj", "avg_loss": 1.4970526695251465, "time": 1.9913051128387451}, {"layer": 22, "module": "mlp.down_proj", "avg_loss": 0.09189730882644653, "time": 6.398738145828247}, {"layer": 23, "module": "self_attn.k_proj", "avg_loss": 0.21037712693214417, "time": 1.8325083255767822}, {"layer": 23, "module": "self_attn.v_proj", "avg_loss": 0.07356338202953339, "time": 1.9076004028320312}, {"layer": 23, "module": "self_attn.q_proj", "avg_loss": 0.9240167140960693, "time": 1.9398488998413086}, {"layer": 23, "module": "self_attn.o_proj", "avg_loss": 0.020356642082333565, "time": 1.9087605476379395}, {"layer": 23, "module": "mlp.up_proj", "avg_loss": 1.5686827898025513, "time": 2.1683661937713623}, {"layer": 23, "module": "mlp.gate_proj", "avg_loss": 1.7221038341522217, "time": 2.005326986312866}, {"layer": 23, "module": "mlp.down_proj", "avg_loss": 0.1993727833032608, "time": 6.028310775756836}, {"layer": 24, "module": "self_attn.k_proj", "avg_loss": 0.2188391238451004, "time": 1.8556897640228271}, {"layer": 24, "module": "self_attn.v_proj", "avg_loss": 0.06599003076553345, "time": 1.9174540042877197}, {"layer": 24, "module": "self_attn.q_proj", "avg_loss": 0.8806427121162415, "time": 1.875502347946167}, {"layer": 24, "module": "self_attn.o_proj", "avg_loss": 0.021921571344137192, "time": 1.936455249786377}, {"layer": 24, "module": "mlp.up_proj", "avg_loss": 1.6316176652908325, "time": 1.9695947170257568}, {"layer": 24, "module": "mlp.gate_proj", "avg_loss": 1.7000706195831299, "time": 1.9819071292877197}, {"layer": 24, "module": "mlp.down_proj", "avg_loss": 0.12996907532215118, "time": 6.018698215484619}, {"layer": 25, "module": "self_attn.k_proj", "avg_loss": 0.25514405965805054, "time": 1.9205126762390137}, {"layer": 25, "module": "self_attn.v_proj", "avg_loss": 0.08992880582809448, "time": 1.9618756771087646}, {"layer": 25, "module": "self_attn.q_proj", "avg_loss": 1.2162957191467285, "time": 1.9778382778167725}, {"layer": 25, "module": "self_attn.o_proj", "avg_loss": 0.02724221721291542, "time": 2.11969256401062}, {"layer": 25, "module": "mlp.up_proj", "avg_loss": 1.7990005016326904, "time": 2.0933282375335693}, {"layer": 25, "module": "mlp.gate_proj", "avg_loss": 1.8395330905914307, "time": 2.227503776550293}, {"layer": 25, "module": "mlp.down_proj", "avg_loss": 0.14658531546592712, "time": 5.958787679672241}, {"layer": 26, "module": "self_attn.k_proj", "avg_loss": 0.2814795970916748, "time": 1.8456754684448242}, {"layer": 26, "module": "self_attn.v_proj", "avg_loss": 0.09002552926540375, "time": 1.8206987380981445}, {"layer": 26, "module": "self_attn.q_proj", "avg_loss": 1.2340577840805054, "time": 1.8324074745178223}, {"layer": 26, "module": "self_attn.o_proj", "avg_loss": 0.04394623264670372, "time": 2.1004765033721924}, {"layer": 26, "module": "mlp.up_proj", "avg_loss": 1.916243314743042, "time": 2.0181593894958496}, {"layer": 26, "module": "mlp.gate_proj", "avg_loss": 1.9283432960510254, "time": 1.9858996868133545}, {"layer": 26, "module": "mlp.down_proj", "avg_loss": 0.17569765448570251, "time": 6.301235198974609}, {"layer": 27, "module": "self_attn.k_proj", "avg_loss": 0.25511568784713745, "time": 1.947096586227417}, {"layer": 27, "module": "self_attn.v_proj", "avg_loss": 0.08556636422872543, "time": 1.9335906505584717}, {"layer": 27, "module": "self_attn.q_proj", "avg_loss": 1.1512045860290527, "time": 1.8659117221832275}, {"layer": 27, "module": "self_attn.o_proj", "avg_loss": 0.05419450253248215, "time": 1.8390471935272217}, {"layer": 27, "module": "mlp.up_proj", "avg_loss": 1.9780595302581787, "time": 1.9581494331359863}, {"layer": 27, "module": "mlp.gate_proj", "avg_loss": 1.9184248447418213, "time": 1.9432156085968018}, {"layer": 27, "module": "mlp.down_proj", "avg_loss": 0.19158539175987244, "time": 6.015784740447998}, {"layer": 28, "module": "self_attn.k_proj", "avg_loss": 0.3091006278991699, "time": 1.929377794265747}, {"layer": 28, "module": "self_attn.v_proj", "avg_loss": 0.09270354360342026, "time": 1.8810112476348877}, {"layer": 28, "module": "self_attn.q_proj", "avg_loss": 1.4435489177703857, "time": 1.932102918624878}, {"layer": 28, "module": "self_attn.o_proj", "avg_loss": 0.04865545406937599, "time": 1.9360723495483398}, {"layer": 28, "module": "mlp.up_proj", "avg_loss": 2.206076145172119, "time": 2.083110809326172}, {"layer": 28, "module": "mlp.gate_proj", "avg_loss": 2.122713088989258, "time": 2.026146650314331}, {"layer": 28, "module": "mlp.down_proj", "avg_loss": 0.2201194316148758, "time": 5.9315855503082275}, {"layer": 29, "module": "self_attn.k_proj", "avg_loss": 0.3013753294944763, "time": 1.902144432067871}, {"layer": 29, "module": "self_attn.v_proj", "avg_loss": 0.11456675827503204, "time": 1.830085277557373}, {"layer": 29, "module": "self_attn.q_proj", "avg_loss": 1.5042316913604736, "time": 1.956463098526001}, {"layer": 29, "module": "self_attn.o_proj", "avg_loss": 0.06087234616279602, "time": 1.8596625328063965}, {"layer": 29, "module": "mlp.up_proj", "avg_loss": 2.3495335578918457, "time": 1.9566035270690918}, {"layer": 29, "module": "mlp.gate_proj", "avg_loss": 2.2288923263549805, "time": 2.0151147842407227}, {"layer": 29, "module": "mlp.down_proj", "avg_loss": 0.2539346218109131, "time": 6.084490537643433}, {"layer": 30, "module": "self_attn.k_proj", "avg_loss": 0.3261649012565613, "time": 1.8259172439575195}, {"layer": 30, "module": "self_attn.v_proj", "avg_loss": 0.14778554439544678, "time": 1.9072704315185547}, {"layer": 30, "module": "self_attn.q_proj", "avg_loss": 1.701836109161377, "time": 1.8653507232666016}, {"layer": 30, "module": "self_attn.o_proj", "avg_loss": 0.06606796383857727, "time": 1.9185779094696045}, {"layer": 30, "module": "mlp.up_proj", "avg_loss": 2.43503999710083, "time": 1.9371919631958008}, {"layer": 30, "module": "mlp.gate_proj", "avg_loss": 2.248983383178711, "time": 1.9929819107055664}, {"layer": 30, "module": "mlp.down_proj", "avg_loss": 0.2812112271785736, "time": 6.073422908782959}, {"layer": 31, "module": "self_attn.k_proj", "avg_loss": 0.2864483594894409, "time": 2.3445842266082764}, {"layer": 31, "module": "self_attn.v_proj", "avg_loss": 0.14119172096252441, "time": 1.8298423290252686}, {"layer": 31, "module": "self_attn.q_proj", "avg_loss": 1.4614856243133545, "time": 1.8383042812347412}, {"layer": 31, "module": "self_attn.o_proj", "avg_loss": 0.08031009882688522, "time": 1.836122751235962}, {"layer": 31, "module": "mlp.up_proj", "avg_loss": 2.5798587799072266, "time": 1.9966380596160889}, {"layer": 31, "module": "mlp.gate_proj", "avg_loss": 2.382880210876465, "time": 1.9340355396270752}, {"layer": 31, "module": "mlp.down_proj", "avg_loss": 0.3076825737953186, "time": 6.061032772064209}, {"layer": 32, "module": "self_attn.k_proj", "avg_loss": 0.3009835183620453, "time": 1.8235316276550293}, {"layer": 32, "module": "self_attn.v_proj", "avg_loss": 0.15944476425647736, "time": 1.849961519241333}, {"layer": 32, "module": "self_attn.q_proj", "avg_loss": 1.568066120147705, "time": 1.8282287120819092}, {"layer": 32, "module": "self_attn.o_proj", "avg_loss": 0.09764911234378815, "time": 1.9372477531433105}, {"layer": 32, "module": "mlp.up_proj", "avg_loss": 2.616942882537842, "time": 1.9276680946350098}, {"layer": 32, "module": "mlp.gate_proj", "avg_loss": 2.401637315750122, "time": 1.9807047843933105}, {"layer": 32, "module": "mlp.down_proj", "avg_loss": 0.3397749066352844, "time": 6.460233688354492}, {"layer": 33, "module": "self_attn.k_proj", "avg_loss": 0.3384885787963867, "time": 1.8380160331726074}, {"layer": 33, "module": "self_attn.v_proj", "avg_loss": 0.17726179957389832, "time": 1.8083081245422363}, {"layer": 33, "module": "self_attn.q_proj", "avg_loss": 1.8356491327285767, "time": 1.9286725521087646}, {"layer": 33, "module": "self_attn.o_proj", "avg_loss": 0.1059388518333435, "time": 1.8920135498046875}, {"layer": 33, "module": "mlp.up_proj", "avg_loss": 2.8450889587402344, "time": 2.006376028060913}, {"layer": 33, "module": "mlp.gate_proj", "avg_loss": 2.590557098388672, "time": 2.0073537826538086}, {"layer": 33, "module": "mlp.down_proj", "avg_loss": 0.38994789123535156, "time": 5.986565828323364}, {"layer": 34, "module": "self_attn.k_proj", "avg_loss": 0.3046690821647644, "time": 1.904970407485962}, {"layer": 34, "module": "self_attn.v_proj", "avg_loss": 0.2030366063117981, "time": 1.8549323081970215}, {"layer": 34, "module": "self_attn.q_proj", "avg_loss": 1.7143616676330566, "time": 1.91471266746521}, {"layer": 34, "module": "self_attn.o_proj", "avg_loss": 0.1300622671842575, "time": 1.8840491771697998}, {"layer": 34, "module": "mlp.up_proj", "avg_loss": 3.060703754425049, "time": 1.9435677528381348}, {"layer": 34, "module": "mlp.gate_proj", "avg_loss": 2.754784107208252, "time": 2.0004782676696777}, {"layer": 34, "module": "mlp.down_proj", "avg_loss": 0.46437686681747437, "time": 6.058029651641846}, {"layer": 35, "module": "self_attn.k_proj", "avg_loss": 0.3035978674888611, "time": 1.8662140369415283}, {"layer": 35, "module": "self_attn.v_proj", "avg_loss": 0.2338593602180481, "time": 1.7833995819091797}, {"layer": 35, "module": "self_attn.q_proj", "avg_loss": 1.7921279668807983, "time": 1.8238987922668457}, {"layer": 35, "module": "self_attn.o_proj", "avg_loss": 0.14122521877288818, "time": 1.813868522644043}, {"layer": 35, "module": "mlp.up_proj", "avg_loss": 3.0941574573516846, "time": 1.9128625392913818}, {"layer": 35, "module": "mlp.gate_proj", "avg_loss": 2.764594793319702, "time": 2.294158935546875}, {"layer": 35, "module": "mlp.down_proj", "avg_loss": 0.5455042123794556, "time": 6.009304046630859}, {"layer": 36, "module": "self_attn.k_proj", "avg_loss": 0.31507450342178345, "time": 1.819091558456421}, {"layer": 36, "module": "self_attn.v_proj", "avg_loss": 0.21919776499271393, "time": 1.8915789127349854}, {"layer": 36, "module": "self_attn.q_proj", "avg_loss": 1.7963085174560547, "time": 1.8230538368225098}, {"layer": 36, "module": "self_attn.o_proj", "avg_loss": 0.18991509079933167, "time": 1.8762269020080566}, {"layer": 36, "module": "mlp.up_proj", "avg_loss": 3.329772472381592, "time": 1.9039978981018066}, {"layer": 36, "module": "mlp.gate_proj", "avg_loss": 2.974346876144409, "time": 1.9426946640014648}, {"layer": 36, "module": "mlp.down_proj", "avg_loss": 0.721920371055603, "time": 6.081002235412598}, {"layer": 37, "module": "self_attn.k_proj", "avg_loss": 0.3207824230194092, "time": 1.8659489154815674}, {"layer": 37, "module": "self_attn.v_proj", "avg_loss": 0.29244983196258545, "time": 1.8251349925994873}, {"layer": 37, "module": "self_attn.q_proj", "avg_loss": 2.0345664024353027, "time": 1.9204273223876953}, {"layer": 37, "module": "self_attn.o_proj", "avg_loss": 0.1635212004184723, "time": 1.8511970043182373}, {"layer": 37, "module": "mlp.up_proj", "avg_loss": 3.803256034851074, "time": 2.176178455352783}, {"layer": 37, "module": "mlp.gate_proj", "avg_loss": 3.3638880252838135, "time": 1.89837646484375}, {"layer": 37, "module": "mlp.down_proj", "avg_loss": 0.9232335090637207, "time": 6.217904806137085}, {"layer": 38, "module": "self_attn.k_proj", "avg_loss": 0.2940463423728943, "time": 1.943788766860962}, {"layer": 38, "module": "self_attn.v_proj", "avg_loss": 0.29667526483535767, "time": 1.9670839309692383}, {"layer": 38, "module": "self_attn.q_proj", "avg_loss": 1.852968692779541, "time": 1.9792156219482422}, {"layer": 38, "module": "self_attn.o_proj", "avg_loss": 0.2611910104751587, "time": 2.1230762004852295}, {"layer": 38, "module": "mlp.up_proj", "avg_loss": 4.291782379150391, "time": 2.037621021270752}, {"layer": 38, "module": "mlp.gate_proj", "avg_loss": 3.8027501106262207, "time": 1.972111701965332}, {"layer": 38, "module": "mlp.down_proj", "avg_loss": 1.2635704278945923, "time": 6.541654109954834}, {"layer": 39, "module": "self_attn.k_proj", "avg_loss": 0.3206734359264374, "time": 1.811762809753418}, {"layer": 39, "module": "self_attn.v_proj", "avg_loss": 0.35270869731903076, "time": 2.076582908630371}, {"layer": 39, "module": "self_attn.q_proj", "avg_loss": 2.002584457397461, "time": 1.8961608409881592}, {"layer": 39, "module": "self_attn.o_proj", "avg_loss": 0.2987370491027832, "time": 1.9402952194213867}, {"layer": 39, "module": "mlp.up_proj", "avg_loss": 4.9384026527404785, "time": 2.033674478530884}, {"layer": 39, "module": "mlp.gate_proj", "avg_loss": 4.410443305969238, "time": 1.956012487411499}, {"layer": 39, "module": "mlp.down_proj", "avg_loss": 1.5368967056274414, "time": 6.441681385040283}, {"layer": 40, "module": "self_attn.k_proj", "avg_loss": 0.3278146982192993, "time": 1.8755030632019043}, {"layer": 40, "module": "self_attn.v_proj", "avg_loss": 0.3424249291419983, "time": 1.9238600730895996}, {"layer": 40, "module": "self_attn.q_proj", "avg_loss": 2.0254697799682617, "time": 1.8599348068237305}, {"layer": 40, "module": "self_attn.o_proj", "avg_loss": 0.27714741230010986, "time": 1.9442472457885742}, {"layer": 40, "module": "mlp.up_proj", "avg_loss": 5.402937889099121, "time": 2.025892734527588}, {"layer": 40, "module": "mlp.gate_proj", "avg_loss": 4.846357822418213, "time": 1.962996244430542}, {"layer": 40, "module": "mlp.down_proj", "avg_loss": 1.8547320365905762, "time": 6.049314737319946}, {"layer": 41, "module": "self_attn.k_proj", "avg_loss": 0.3187938630580902, "time": 1.8933558464050293}, {"layer": 41, "module": "self_attn.v_proj", "avg_loss": 0.3812093734741211, "time": 1.8362908363342285}, {"layer": 41, "module": "self_attn.q_proj", "avg_loss": 2.008685827255249, "time": 1.9421918392181396}, {"layer": 41, "module": "self_attn.o_proj", "avg_loss": 0.29723936319351196, "time": 1.9397072792053223}, {"layer": 41, "module": "mlp.up_proj", "avg_loss": 5.978024482727051, "time": 1.9852571487426758}, {"layer": 41, "module": "mlp.gate_proj", "avg_loss": 5.439467430114746, "time": 1.9439404010772705}, {"layer": 41, "module": "mlp.down_proj", "avg_loss": 2.219334125518799, "time": 6.035233736038208}, {"layer": 42, "module": "self_attn.k_proj", "avg_loss": 0.3380385637283325, "time": 1.896632194519043}, {"layer": 42, "module": "self_attn.v_proj", "avg_loss": 0.46606552600860596, "time": 1.8708438873291016}, {"layer": 42, "module": "self_attn.q_proj", "avg_loss": 2.214740753173828, "time": 1.930067777633667}, {"layer": 42, "module": "self_attn.o_proj", "avg_loss": 0.3397062420845032, "time": 1.873399257659912}, {"layer": 42, "module": "mlp.up_proj", "avg_loss": 6.618650436401367, "time": 1.954754114151001}, {"layer": 42, "module": "mlp.gate_proj", "avg_loss": 6.043259620666504, "time": 1.946732521057129}, {"layer": 42, "module": "mlp.down_proj", "avg_loss": 2.7733521461486816, "time": 6.116879940032959}, {"layer": 43, "module": "self_attn.k_proj", "avg_loss": 0.30970877408981323, "time": 1.7939622402191162}, {"layer": 43, "module": "self_attn.v_proj", "avg_loss": 0.5388261079788208, "time": 1.8993561267852783}, {"layer": 43, "module": "self_attn.q_proj", "avg_loss": 2.07297945022583, "time": 1.8480706214904785}, {"layer": 43, "module": "self_attn.o_proj", "avg_loss": 0.2613966166973114, "time": 1.8370945453643799}, {"layer": 43, "module": "mlp.up_proj", "avg_loss": 7.230991363525391, "time": 1.9488699436187744}, {"layer": 43, "module": "mlp.gate_proj", "avg_loss": 6.718009948730469, "time": 1.985924482345581}, {"layer": 43, "module": "mlp.down_proj", "avg_loss": 3.056917190551758, "time": 6.059642791748047}, {"layer": 44, "module": "self_attn.k_proj", "avg_loss": 0.3370877802371979, "time": 2.2071480751037598}, {"layer": 44, "module": "self_attn.v_proj", "avg_loss": 0.6168112754821777, "time": 1.8676605224609375}, {"layer": 44, "module": "self_attn.q_proj", "avg_loss": 2.26366925239563, "time": 1.8308823108673096}, {"layer": 44, "module": "self_attn.o_proj", "avg_loss": 0.34490031003952026, "time": 1.8771860599517822}, {"layer": 44, "module": "mlp.up_proj", "avg_loss": 7.789271354675293, "time": 2.1019949913024902}, {"layer": 44, "module": "mlp.gate_proj", "avg_loss": 7.320127487182617, "time": 2.040691614151001}, {"layer": 44, "module": "mlp.down_proj", "avg_loss": 3.444545269012451, "time": 6.251432418823242}, {"layer": 45, "module": "self_attn.k_proj", "avg_loss": 0.3530714809894562, "time": 1.838623046875}, {"layer": 45, "module": "self_attn.v_proj", "avg_loss": 0.6392807960510254, "time": 1.8270628452301025}, {"layer": 45, "module": "self_attn.q_proj", "avg_loss": 2.312641143798828, "time": 1.8335671424865723}, {"layer": 45, "module": "self_attn.o_proj", "avg_loss": 0.3659293055534363, "time": 1.9195146560668945}, {"layer": 45, "module": "mlp.up_proj", "avg_loss": 8.285255432128906, "time": 1.9829161167144775}, {"layer": 45, "module": "mlp.gate_proj", "avg_loss": 7.861084938049316, "time": 1.932304859161377}, {"layer": 45, "module": "mlp.down_proj", "avg_loss": 3.7955095767974854, "time": 5.971371412277222}, {"layer": 46, "module": "self_attn.k_proj", "avg_loss": 0.36338868737220764, "time": 1.8186569213867188}, {"layer": 46, "module": "self_attn.v_proj", "avg_loss": 0.618753969669342, "time": 1.8102800846099854}, {"layer": 46, "module": "self_attn.q_proj", "avg_loss": 2.318803310394287, "time": 1.8344168663024902}, {"layer": 46, "module": "self_attn.o_proj", "avg_loss": 0.2620357573032379, "time": 1.8447928428649902}, {"layer": 46, "module": "mlp.up_proj", "avg_loss": 8.649650573730469, "time": 1.9012298583984375}, {"layer": 46, "module": "mlp.gate_proj", "avg_loss": 8.302907943725586, "time": 1.9056930541992188}, {"layer": 46, "module": "mlp.down_proj", "avg_loss": 3.921764850616455, "time": 5.995375156402588}, {"layer": 47, "module": "self_attn.k_proj", "avg_loss": 0.4108298420906067, "time": 1.8901162147521973}, {"layer": 47, "module": "self_attn.v_proj", "avg_loss": 0.6499589681625366, "time": 1.8272416591644287}, {"layer": 47, "module": "self_attn.q_proj", "avg_loss": 2.55393648147583, "time": 1.9052097797393799}, {"layer": 47, "module": "self_attn.o_proj", "avg_loss": 0.4667437672615051, "time": 1.8264439105987549}, {"layer": 47, "module": "mlp.up_proj", "avg_loss": 9.285907745361328, "time": 2.1143224239349365}, {"layer": 47, "module": "mlp.gate_proj", "avg_loss": 8.917174339294434, "time": 1.9751040935516357}, {"layer": 47, "module": "mlp.down_proj", "avg_loss": 4.372514724731445, "time": 6.154353141784668}, {"layer": 48, "module": "self_attn.k_proj", "avg_loss": 0.42389604449272156, "time": 1.8216540813446045}, {"layer": 48, "module": "self_attn.v_proj", "avg_loss": 0.7187693119049072, "time": 2.093024730682373}, {"layer": 48, "module": "self_attn.q_proj", "avg_loss": 2.677086353302002, "time": 1.846750020980835}, {"layer": 48, "module": "self_attn.o_proj", "avg_loss": 0.34046104550361633, "time": 1.8371195793151855}, {"layer": 48, "module": "mlp.up_proj", "avg_loss": 9.835952758789062, "time": 1.900439977645874}, {"layer": 48, "module": "mlp.gate_proj", "avg_loss": 9.446802139282227, "time": 1.9156501293182373}, {"layer": 48, "module": "mlp.down_proj", "avg_loss": 4.547147750854492, "time": 5.993283271789551}, {"layer": 49, "module": "self_attn.k_proj", "avg_loss": 0.4368930459022522, "time": 1.8431410789489746}, {"layer": 49, "module": "self_attn.v_proj", "avg_loss": 0.8883968591690063, "time": 1.8779730796813965}, {"layer": 49, "module": "self_attn.q_proj", "avg_loss": 2.826608180999756, "time": 1.863166093826294}, {"layer": 49, "module": "self_attn.o_proj", "avg_loss": 0.34190547466278076, "time": 1.932436227798462}, {"layer": 49, "module": "mlp.up_proj", "avg_loss": 10.58414363861084, "time": 2.0124714374542236}, {"layer": 49, "module": "mlp.gate_proj", "avg_loss": 10.151674270629883, "time": 1.9419221878051758}, {"layer": 49, "module": "mlp.down_proj", "avg_loss": 4.811685085296631, "time": 6.3509461879730225}, {"layer": 50, "module": "self_attn.k_proj", "avg_loss": 0.4587148427963257, "time": 1.8830089569091797}, {"layer": 50, "module": "self_attn.v_proj", "avg_loss": 0.9281307458877563, "time": 1.8161141872406006}, {"layer": 50, "module": "self_attn.q_proj", "avg_loss": 2.967343807220459, "time": 1.9045484066009521}, {"layer": 50, "module": "self_attn.o_proj", "avg_loss": 0.4071245789527893, "time": 1.867581844329834}, {"layer": 50, "module": "mlp.up_proj", "avg_loss": 11.325093269348145, "time": 1.9045090675354004}, {"layer": 50, "module": "mlp.gate_proj", "avg_loss": 10.827753067016602, "time": 2.006154775619507}, {"layer": 50, "module": "mlp.down_proj", "avg_loss": 5.110833644866943, "time": 5.960044622421265}, {"layer": 51, "module": "self_attn.k_proj", "avg_loss": 0.4568782448768616, "time": 1.88144850730896}, {"layer": 51, "module": "self_attn.v_proj", "avg_loss": 0.890943169593811, "time": 1.822016716003418}, {"layer": 51, "module": "self_attn.q_proj", "avg_loss": 2.8322525024414062, "time": 1.9340732097625732}, {"layer": 51, "module": "self_attn.o_proj", "avg_loss": 0.3959425091743469, "time": 1.8401951789855957}, {"layer": 51, "module": "mlp.up_proj", "avg_loss": 11.951260566711426, "time": 2.0116467475891113}, {"layer": 51, "module": "mlp.gate_proj", "avg_loss": 11.388328552246094, "time": 1.9329392910003662}, {"layer": 51, "module": "mlp.down_proj", "avg_loss": 5.384354591369629, "time": 6.027143478393555}, {"layer": 52, "module": "self_attn.k_proj", "avg_loss": 0.5242612361907959, "time": 1.8713316917419434}, {"layer": 52, "module": "self_attn.v_proj", "avg_loss": 1.0247985124588013, "time": 1.9013481140136719}, {"layer": 52, "module": "self_attn.q_proj", "avg_loss": 3.359513759613037, "time": 1.8228540420532227}, {"layer": 52, "module": "self_attn.o_proj", "avg_loss": 0.5764110088348389, "time": 1.9388468265533447}, {"layer": 52, "module": "mlp.up_proj", "avg_loss": 12.665153503417969, "time": 1.9352874755859375}, {"layer": 52, "module": "mlp.gate_proj", "avg_loss": 11.979243278503418, "time": 1.9933133125305176}, {"layer": 52, "module": "mlp.down_proj", "avg_loss": 5.6481122970581055, "time": 6.128675937652588}, {"layer": 53, "module": "self_attn.k_proj", "avg_loss": 0.4874039888381958, "time": 1.810487985610962}, {"layer": 53, "module": "self_attn.v_proj", "avg_loss": 0.9578018188476562, "time": 1.87398099899292}, {"layer": 53, "module": "self_attn.q_proj", "avg_loss": 3.0273258686065674, "time": 1.83668851852417}, {"layer": 53, "module": "self_attn.o_proj", "avg_loss": 0.3602977395057678, "time": 2.021329879760742}, {"layer": 53, "module": "mlp.up_proj", "avg_loss": 13.278337478637695, "time": 1.9470360279083252}, {"layer": 53, "module": "mlp.gate_proj", "avg_loss": 12.498838424682617, "time": 1.9352881908416748}, {"layer": 53, "module": "mlp.down_proj", "avg_loss": 5.975218772888184, "time": 6.100816965103149}, {"layer": 54, "module": "self_attn.k_proj", "avg_loss": 0.5246659517288208, "time": 1.931333065032959}, {"layer": 54, "module": "self_attn.v_proj", "avg_loss": 1.2032103538513184, "time": 2.3146939277648926}, {"layer": 54, "module": "self_attn.q_proj", "avg_loss": 3.359302520751953, "time": 1.94272780418396}, {"layer": 54, "module": "self_attn.o_proj", "avg_loss": 0.46342599391937256, "time": 1.9898741245269775}, {"layer": 54, "module": "mlp.up_proj", "avg_loss": 13.803262710571289, "time": 2.0252394676208496}, {"layer": 54, "module": "mlp.gate_proj", "avg_loss": 12.884258270263672, "time": 1.9755973815917969}, {"layer": 54, "module": "mlp.down_proj", "avg_loss": 6.168679237365723, "time": 6.13431453704834}, {"layer": 55, "module": "self_attn.k_proj", "avg_loss": 0.5104924440383911, "time": 1.8360211849212646}, {"layer": 55, "module": "self_attn.v_proj", "avg_loss": 1.198095679283142, "time": 1.8372764587402344}, {"layer": 55, "module": "self_attn.q_proj", "avg_loss": 3.316408395767212, "time": 1.828721046447754}, {"layer": 55, "module": "self_attn.o_proj", "avg_loss": 0.4631623923778534, "time": 1.8980424404144287}, {"layer": 55, "module": "mlp.up_proj", "avg_loss": 14.333661079406738, "time": 1.9578242301940918}, {"layer": 55, "module": "mlp.gate_proj", "avg_loss": 13.275215148925781, "time": 2.016814708709717}, {"layer": 55, "module": "mlp.down_proj", "avg_loss": 6.443043231964111, "time": 6.350236177444458}, {"layer": 56, "module": "self_attn.k_proj", "avg_loss": 0.5237419009208679, "time": 1.7946507930755615}, {"layer": 56, "module": "self_attn.v_proj", "avg_loss": 1.1870123147964478, "time": 1.7938802242279053}, {"layer": 56, "module": "self_attn.q_proj", "avg_loss": 3.419738292694092, "time": 1.9857163429260254}, {"layer": 56, "module": "self_attn.o_proj", "avg_loss": 0.6653677821159363, "time": 1.8425400257110596}, {"layer": 56, "module": "mlp.up_proj", "avg_loss": 15.028109550476074, "time": 1.9549403190612793}, {"layer": 56, "module": "mlp.gate_proj", "avg_loss": 13.790353775024414, "time": 2.0429251194000244}, {"layer": 56, "module": "mlp.down_proj", "avg_loss": 6.9286346435546875, "time": 5.990747451782227}, {"layer": 57, "module": "self_attn.k_proj", "avg_loss": 0.5074000358581543, "time": 1.927621841430664}, {"layer": 57, "module": "self_attn.v_proj", "avg_loss": 1.3384346961975098, "time": 1.8077003955841064}, {"layer": 57, "module": "self_attn.q_proj", "avg_loss": 3.5333211421966553, "time": 1.9844741821289062}, {"layer": 57, "module": "self_attn.o_proj", "avg_loss": 1.1616597175598145, "time": 1.944589376449585}, {"layer": 57, "module": "mlp.up_proj", "avg_loss": 15.518880844116211, "time": 1.974867582321167}, {"layer": 57, "module": "mlp.gate_proj", "avg_loss": 14.082801818847656, "time": 2.010927200317383}, {"layer": 57, "module": "mlp.down_proj", "avg_loss": 8.005138397216797, "time": 6.278590679168701}, {"layer": 58, "module": "self_attn.k_proj", "avg_loss": 0.4275645911693573, "time": 1.9751157760620117}, {"layer": 58, "module": "self_attn.v_proj", "avg_loss": 0.8636503219604492, "time": 2.0367674827575684}, {"layer": 58, "module": "self_attn.q_proj", "avg_loss": 2.5915660858154297, "time": 1.9052000045776367}, {"layer": 58, "module": "self_attn.o_proj", "avg_loss": 0.6384254693984985, "time": 1.8598175048828125}, {"layer": 58, "module": "mlp.up_proj", "avg_loss": 15.65725040435791, "time": 2.1883528232574463}, {"layer": 58, "module": "mlp.gate_proj", "avg_loss": 13.969974517822266, "time": 1.969085693359375}, {"layer": 58, "module": "mlp.down_proj", "avg_loss": 9.035923957824707, "time": 6.101497173309326}, {"layer": 59, "module": "self_attn.k_proj", "avg_loss": 0.3972378075122833, "time": 1.9078431129455566}, {"layer": 59, "module": "self_attn.v_proj", "avg_loss": 0.7083086371421814, "time": 1.8063952922821045}, {"layer": 59, "module": "self_attn.q_proj", "avg_loss": 2.3743932247161865, "time": 1.8245151042938232}, {"layer": 59, "module": "self_attn.o_proj", "avg_loss": 0.7058578133583069, "time": 1.9287500381469727}, {"layer": 59, "module": "mlp.up_proj", "avg_loss": 15.381579399108887, "time": 1.9360861778259277}, {"layer": 59, "module": "mlp.gate_proj", "avg_loss": 13.631448745727539, "time": 1.921344518661499}, {"layer": 59, "module": "mlp.down_proj", "avg_loss": 13.823325157165527, "time": 6.154451847076416}, {"layer": 60, "module": "self_attn.k_proj", "avg_loss": 0.3051488399505615, "time": 1.7982447147369385}, {"layer": 60, "module": "self_attn.v_proj", "avg_loss": 0.5437061786651611, "time": 1.8412508964538574}, {"layer": 60, "module": "self_attn.q_proj", "avg_loss": 1.820873498916626, "time": 2.219529628753662}, {"layer": 60, "module": "self_attn.o_proj", "avg_loss": 0.5784634947776794, "time": 1.8812010288238525}, {"layer": 60, "module": "mlp.up_proj", "avg_loss": 12.867664337158203, "time": 1.9085018634796143}, {"layer": 60, "module": "mlp.gate_proj", "avg_loss": 11.931564331054688, "time": 1.9391067028045654}, {"layer": 60, "module": "mlp.down_proj", "avg_loss": 43.92301940917969, "time": 6.3541553020477295}]
|
quantize_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": 4,
|
3 |
+
"group_size": 128,
|
4 |
+
"damp_percent": 0.005,
|
5 |
+
"desc_act": false,
|
6 |
+
"static_groups": false,
|
7 |
+
"sym": false,
|
8 |
+
"true_sequential": true,
|
9 |
+
"model_name_or_path": "/monster/data/model/Yi-1.5-34B/quant/autogptq_version_pr640_bit4_group128_seq2048_batch1/damp0.005_descFalse_gptq_symFalse_pack_dataFalse_2024-05-13_10-47-34/ptb0_wikitext21024_gr0_dic0_sen0_det0_rate0",
|
10 |
+
"model_file_base_name": "model",
|
11 |
+
"quant_method": "gptq",
|
12 |
+
"checkpoint_format": "gptq",
|
13 |
+
"meta": {
|
14 |
+
"quantizer": "autogptq:0.8.0.dev1"
|
15 |
+
}
|
16 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
|
3 |
+
size 1033105
|