LRL commited on
Commit
a1a067d
1 Parent(s): a2ab7c1

Init Model

Browse files
cal_data_size.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ptb:0
2
+ wikitext2:1024
3
+ grammar:0
4
+ dict:0
5
+ sentiment:0
6
+ detection:0
7
+ rate:0
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/monster/data/model/Yi-1.5-9B-Chat",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 11008,
14
+ "max_position_embeddings": 4096,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 48,
18
+ "num_key_value_heads": 4,
19
+ "pad_token_id": 0,
20
+ "pretraining_tp": 1,
21
+ "quantization_config": {
22
+ "bits": 4,
23
+ "checkpoint_format": "gptq",
24
+ "damp_percent": 0.005,
25
+ "desc_act": false,
26
+ "group_size": 128,
27
+ "meta": {
28
+ "quantizer": "autogptq:0.8.0.dev1"
29
+ },
30
+ "model_file_base_name": null,
31
+ "model_name_or_path": null,
32
+ "quant_method": "gptq",
33
+ "static_groups": false,
34
+ "sym": false,
35
+ "true_sequential": true
36
+ },
37
+ "rms_norm_eps": 1e-06,
38
+ "rope_scaling": null,
39
+ "rope_theta": 5000000.0,
40
+ "tie_word_embeddings": false,
41
+ "torch_dtype": "float16",
42
+ "transformers_version": "4.40.2",
43
+ "use_cache": false,
44
+ "vocab_size": 64000
45
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6856875ad789f3f982c133c51558df071bfdf72da70b0439d0ad154215e171
3
+ size 5370927624
quant_log.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"layer": 1, "module": "self_attn.k_proj", "avg_loss": 6.07555910140749, "time": 1.1989927291870117}, {"layer": 1, "module": "self_attn.v_proj", "avg_loss": 0.092087707822285, "time": 0.952211856842041}, {"layer": 1, "module": "self_attn.q_proj", "avg_loss": 13.610186864459326, "time": 1.006657361984253}, {"layer": 1, "module": "self_attn.o_proj", "avg_loss": 0.0015188914442819262, "time": 1.0093824863433838}, {"layer": 1, "module": "mlp.up_proj", "avg_loss": 2.984619140625, "time": 1.0060949325561523}, {"layer": 1, "module": "mlp.gate_proj", "avg_loss": 2.8943990071614585, "time": 1.1021208763122559}, {"layer": 1, "module": "mlp.down_proj", "avg_loss": 0.013252967879885719, "time": 2.7407517433166504}, {"layer": 2, "module": "self_attn.k_proj", "avg_loss": 0.6207242693219867, "time": 1.014268159866333}, {"layer": 2, "module": "self_attn.v_proj", "avg_loss": 0.07390364389570933, "time": 0.9791405200958252}, {"layer": 2, "module": "self_attn.q_proj", "avg_loss": 2.0985901847718256, "time": 1.0277860164642334}, {"layer": 2, "module": "self_attn.o_proj", "avg_loss": 0.003262707165309361, "time": 1.0081403255462646}, {"layer": 2, "module": "mlp.up_proj", "avg_loss": 4.488492451016865, "time": 0.9722049236297607}, {"layer": 2, "module": "mlp.gate_proj", "avg_loss": 4.655342223152282, "time": 0.9833333492279053}, {"layer": 2, "module": "mlp.down_proj", "avg_loss": 0.12124758674984887, "time": 2.8704895973205566}, {"layer": 3, "module": "self_attn.k_proj", "avg_loss": 6.5851953900049605, "time": 1.0230138301849365}, {"layer": 3, "module": "self_attn.v_proj", "avg_loss": 0.9290126013377357, "time": 1.3394906520843506}, {"layer": 3, "module": "self_attn.q_proj", "avg_loss": 21.569209991939484, "time": 1.1159183979034424}, {"layer": 3, "module": "self_attn.o_proj", "avg_loss": 0.08918521517799013, "time": 1.0682380199432373}, {"layer": 3, "module": "mlp.up_proj", "avg_loss": 17.113414946056547, "time": 1.1852529048919678}, {"layer": 3, "module": "mlp.gate_proj", "avg_loss": 18.142487056671627, "time": 1.0833241939544678}, {"layer": 3, "module": "mlp.down_proj", "avg_loss": 0.2619803897918217, "time": 2.7054529190063477}, {"layer": 4, "module": "self_attn.k_proj", "avg_loss": 10.384496295262897, "time": 0.9760968685150146}, {"layer": 4, "module": "self_attn.v_proj", "avg_loss": 1.4956694103422619, "time": 0.9847433567047119}, {"layer": 4, "module": "self_attn.q_proj", "avg_loss": 35.64081101190476, "time": 0.9789047241210938}, {"layer": 4, "module": "self_attn.o_proj", "avg_loss": 0.06657356686062282, "time": 1.0092568397521973}, {"layer": 4, "module": "mlp.up_proj", "avg_loss": 31.02575489831349, "time": 1.0094764232635498}, {"layer": 4, "module": "mlp.gate_proj", "avg_loss": 33.42299107142857, "time": 1.032710313796997}, {"layer": 4, "module": "mlp.down_proj", "avg_loss": 0.4973341321188306, "time": 3.2422657012939453}, {"layer": 5, "module": "self_attn.k_proj", "avg_loss": 31.18124534970238, "time": 1.0694208145141602}, {"layer": 5, "module": "self_attn.v_proj", "avg_loss": 3.056824214874752, "time": 1.056685447692871}, {"layer": 5, "module": "self_attn.q_proj", "avg_loss": 103.97890314980158, "time": 1.1042323112487793}, {"layer": 5, "module": "self_attn.o_proj", "avg_loss": 0.1741074758862692, "time": 1.0364348888397217}, {"layer": 5, "module": "mlp.up_proj", "avg_loss": 45.92235553075397, "time": 1.0543830394744873}, {"layer": 5, "module": "mlp.gate_proj", "avg_loss": 49.770345052083336, "time": 1.025291919708252}, {"layer": 5, "module": "mlp.down_proj", "avg_loss": 0.7696254064166357, "time": 2.835797071456909}, {"layer": 6, "module": "self_attn.k_proj", "avg_loss": 23.704723539806547, "time": 1.1021196842193604}, {"layer": 6, "module": "self_attn.v_proj", "avg_loss": 4.472733754960317, "time": 1.027282953262329}, {"layer": 6, "module": "self_attn.q_proj", "avg_loss": 97.04109312996032, "time": 1.019303560256958}, {"layer": 6, "module": "self_attn.o_proj", "avg_loss": 0.1761334434388176, "time": 1.0544569492340088}, {"layer": 6, "module": "mlp.up_proj", "avg_loss": 62.56038798983135, "time": 1.0191833972930908}, {"layer": 6, "module": "mlp.gate_proj", "avg_loss": 71.06376333085318, "time": 0.9951896667480469}, {"layer": 6, "module": "mlp.down_proj", "avg_loss": 1.4124684409489707, "time": 2.7571372985839844}, {"layer": 7, "module": "self_attn.k_proj", "avg_loss": 33.028932601686506, "time": 0.9937546253204346}, {"layer": 7, "module": "self_attn.v_proj", "avg_loss": 5.222924610925099, "time": 0.9707322120666504}, {"layer": 7, "module": "self_attn.q_proj", "avg_loss": 132.2830791170635, "time": 0.9759719371795654}, {"layer": 7, "module": "self_attn.o_proj", "avg_loss": 0.26717376708984375, "time": 1.0306122303009033}, {"layer": 7, "module": "mlp.up_proj", "avg_loss": 75.07511780753968, "time": 1.0964999198913574}, {"layer": 7, "module": "mlp.gate_proj", "avg_loss": 84.79453435019842, "time": 1.0246057510375977}, {"layer": 7, "module": "mlp.down_proj", "avg_loss": 2.075159708658854, "time": 2.7954280376434326}, {"layer": 8, "module": "self_attn.k_proj", "avg_loss": 43.65645151289682, "time": 0.9816792011260986}, {"layer": 8, "module": "self_attn.v_proj", "avg_loss": 6.0629776243179565, "time": 0.9998762607574463}, {"layer": 8, "module": "self_attn.q_proj", "avg_loss": 161.06556919642858, "time": 0.9835407733917236}, {"layer": 8, "module": "self_attn.o_proj", "avg_loss": 0.5877440921843998, "time": 0.9692838191986084}, {"layer": 8, "module": "mlp.up_proj", "avg_loss": 102.96657986111111, "time": 1.1100773811340332}, {"layer": 8, "module": "mlp.gate_proj", "avg_loss": 120.58316282242063, "time": 1.1160168647766113}, {"layer": 8, "module": "mlp.down_proj", "avg_loss": 3.5943157862103177, "time": 2.947986125946045}, {"layer": 9, "module": "self_attn.k_proj", "avg_loss": 47.172537667410715, "time": 1.3620450496673584}, {"layer": 9, "module": "self_attn.v_proj", "avg_loss": 6.2648421999007935, "time": 1.128283977508545}, {"layer": 9, "module": "self_attn.q_proj", "avg_loss": 183.23490203373015, "time": 1.1031975746154785}, {"layer": 9, "module": "self_attn.o_proj", "avg_loss": 0.7824783325195312, "time": 1.1453232765197754}, {"layer": 9, "module": "mlp.up_proj", "avg_loss": 142.2412574404762, "time": 1.146754264831543}, {"layer": 9, "module": "mlp.gate_proj", "avg_loss": 159.38802083333334, "time": 1.1283166408538818}, {"layer": 9, "module": "mlp.down_proj", "avg_loss": 26.610279870411706, "time": 2.992835760116577}, {"layer": 10, "module": "self_attn.k_proj", "avg_loss": 59.09978763640873, "time": 1.1743886470794678}, {"layer": 10, "module": "self_attn.v_proj", "avg_loss": 10.807449583023313, "time": 1.1315057277679443}, {"layer": 10, "module": "self_attn.q_proj", "avg_loss": 250.02720424107142, "time": 1.0952115058898926}, {"layer": 10, "module": "self_attn.o_proj", "avg_loss": 0.9591363573831225, "time": 1.1346931457519531}, {"layer": 10, "module": "mlp.up_proj", "avg_loss": 178.00914558531747, "time": 1.1192851066589355}, {"layer": 10, "module": "mlp.gate_proj", "avg_loss": 203.64248511904762, "time": 1.1584949493408203}, {"layer": 10, "module": "mlp.down_proj", "avg_loss": 7.176428416418651, "time": 3.1243090629577637}, {"layer": 11, "module": "self_attn.k_proj", "avg_loss": 52.51483057415675, "time": 1.093508243560791}, {"layer": 11, "module": "self_attn.v_proj", "avg_loss": 9.11791023375496, "time": 1.0927166938781738}, {"layer": 11, "module": "self_attn.q_proj", "avg_loss": 218.51215277777777, "time": 1.1101269721984863}, {"layer": 11, "module": "self_attn.o_proj", "avg_loss": 1.8513372512090773, "time": 1.1158220767974854}, {"layer": 11, "module": "mlp.up_proj", "avg_loss": 210.39605034722223, "time": 1.123816728591919}, {"layer": 11, "module": "mlp.gate_proj", "avg_loss": 237.23685515873015, "time": 1.129089117050171}, {"layer": 11, "module": "mlp.down_proj", "avg_loss": 9.267122783358134, "time": 3.1289165019989014}, {"layer": 12, "module": "self_attn.k_proj", "avg_loss": 65.00314670138889, "time": 1.1199884414672852}, {"layer": 12, "module": "self_attn.v_proj", "avg_loss": 12.867935422867063, "time": 1.09584379196167}, {"layer": 12, "module": "self_attn.q_proj", "avg_loss": 323.46490575396825, "time": 1.085179090499878}, {"layer": 12, "module": "self_attn.o_proj", "avg_loss": 2.381710960751488, "time": 1.1105430126190186}, {"layer": 12, "module": "mlp.up_proj", "avg_loss": 246.17277405753967, "time": 1.1257045269012451}, {"layer": 12, "module": "mlp.gate_proj", "avg_loss": 280.3374875992063, "time": 1.1537070274353027}, {"layer": 12, "module": "mlp.down_proj", "avg_loss": 12.485187833271329, "time": 3.2142815589904785}, {"layer": 13, "module": "self_attn.k_proj", "avg_loss": 45.31321304563492, "time": 1.1052143573760986}, {"layer": 13, "module": "self_attn.v_proj", "avg_loss": 12.027442568824405, "time": 1.081942081451416}, {"layer": 13, "module": "self_attn.q_proj", "avg_loss": 217.730220734127, "time": 1.115243673324585}, {"layer": 13, "module": "self_attn.o_proj", "avg_loss": 3.7853340270027283, "time": 1.1477315425872803}, {"layer": 13, "module": "mlp.up_proj", "avg_loss": 254.33545696924602, "time": 1.0996038913726807}, {"layer": 13, "module": "mlp.gate_proj", "avg_loss": 285.34343998015873, "time": 1.09830641746521}, {"layer": 13, "module": "mlp.down_proj", "avg_loss": 15.03830295138889, "time": 3.1277129650115967}, {"layer": 14, "module": "self_attn.k_proj", "avg_loss": 66.7344990079365, "time": 1.1410393714904785}, {"layer": 14, "module": "self_attn.v_proj", "avg_loss": 10.682472834511408, "time": 1.1350440979003906}, {"layer": 14, "module": "self_attn.q_proj", "avg_loss": 299.1382068452381, "time": 1.1012296676635742}, {"layer": 14, "module": "self_attn.o_proj", "avg_loss": 4.155755905877976, "time": 1.1182308197021484}, {"layer": 14, "module": "mlp.up_proj", "avg_loss": 270.98418898809524, "time": 1.1613264083862305}, {"layer": 14, "module": "mlp.gate_proj", "avg_loss": 294.2093253968254, "time": 1.1932356357574463}, {"layer": 14, "module": "mlp.down_proj", "avg_loss": 16.750895182291668, "time": 3.1275711059570312}, {"layer": 15, "module": "self_attn.k_proj", "avg_loss": 68.70643446180556, "time": 1.119274616241455}, {"layer": 15, "module": "self_attn.v_proj", "avg_loss": 16.287132626488095, "time": 1.1123263835906982}, {"layer": 15, "module": "self_attn.q_proj", "avg_loss": 351.15411086309524, "time": 1.087372064590454}, {"layer": 15, "module": "self_attn.o_proj", "avg_loss": 3.9644869849795388, "time": 1.1200811862945557}, {"layer": 15, "module": "mlp.up_proj", "avg_loss": 294.7902405753968, "time": 1.1353135108947754}, {"layer": 15, "module": "mlp.gate_proj", "avg_loss": 313.3893539186508, "time": 1.1164593696594238}, {"layer": 15, "module": "mlp.down_proj", "avg_loss": 19.49260021391369, "time": 3.067533493041992}, {"layer": 16, "module": "self_attn.k_proj", "avg_loss": 63.73110816592262, "time": 1.0770621299743652}, {"layer": 16, "module": "self_attn.v_proj", "avg_loss": 18.131839812748016, "time": 1.0665881633758545}, {"layer": 16, "module": "self_attn.q_proj", "avg_loss": 324.74317956349205, "time": 1.0468401908874512}, {"layer": 16, "module": "self_attn.o_proj", "avg_loss": 6.21551513671875, "time": 1.0925486087799072}, {"layer": 16, "module": "mlp.up_proj", "avg_loss": 328.2116195436508, "time": 1.104447603225708}, {"layer": 16, "module": "mlp.gate_proj", "avg_loss": 343.67897445436506, "time": 1.104004144668579}, {"layer": 16, "module": "mlp.down_proj", "avg_loss": 21.608770461309526, "time": 2.9482505321502686}, {"layer": 17, "module": "self_attn.k_proj", "avg_loss": 67.37312437996032, "time": 1.151205062866211}, {"layer": 17, "module": "self_attn.v_proj", "avg_loss": 19.769690135168652, "time": 1.1148128509521484}, {"layer": 17, "module": "self_attn.q_proj", "avg_loss": 348.9752914186508, "time": 1.073749303817749}, {"layer": 17, "module": "self_attn.o_proj", "avg_loss": 5.957440088665675, "time": 1.106172800064087}, {"layer": 17, "module": "mlp.up_proj", "avg_loss": 347.47181919642856, "time": 1.163684606552124}, {"layer": 17, "module": "mlp.gate_proj", "avg_loss": 358.54668898809524, "time": 1.1471447944641113}, {"layer": 17, "module": "mlp.down_proj", "avg_loss": 25.30935523623512, "time": 3.0883700847625732}, {"layer": 18, "module": "self_attn.k_proj", "avg_loss": 70.67750961061508, "time": 1.0736479759216309}, {"layer": 18, "module": "self_attn.v_proj", "avg_loss": 20.54845222594246, "time": 1.0718200206756592}, {"layer": 18, "module": "self_attn.q_proj", "avg_loss": 382.68557787698415, "time": 1.0638806819915771}, {"layer": 18, "module": "self_attn.o_proj", "avg_loss": 8.565586635044642, "time": 1.0629026889801025}, {"layer": 18, "module": "mlp.up_proj", "avg_loss": 348.93430679563494, "time": 1.0907115936279297}, {"layer": 18, "module": "mlp.gate_proj", "avg_loss": 348.499751984127, "time": 1.0803930759429932}, {"layer": 18, "module": "mlp.down_proj", "avg_loss": 27.27324761284722, "time": 3.0962066650390625}, {"layer": 19, "module": "self_attn.k_proj", "avg_loss": 66.31215897817461, "time": 1.112175703048706}, {"layer": 19, "module": "self_attn.v_proj", "avg_loss": 26.139559306795636, "time": 1.0810468196868896}, {"layer": 19, "module": "self_attn.q_proj", "avg_loss": 385.45101686507934, "time": 1.0614211559295654}, {"layer": 19, "module": "self_attn.o_proj", "avg_loss": 10.591699993799603, "time": 1.1012322902679443}, {"layer": 19, "module": "mlp.up_proj", "avg_loss": 367.1476314484127, "time": 1.0970265865325928}, {"layer": 19, "module": "mlp.gate_proj", "avg_loss": 368.72491939484127, "time": 1.0908219814300537}, {"layer": 19, "module": "mlp.down_proj", "avg_loss": 31.047529916914684, "time": 2.979593276977539}, {"layer": 20, "module": "self_attn.k_proj", "avg_loss": 71.04359654017857, "time": 1.137005090713501}, {"layer": 20, "module": "self_attn.v_proj", "avg_loss": 31.60478283110119, "time": 1.186471939086914}, {"layer": 20, "module": "self_attn.q_proj", "avg_loss": 391.2449466765873, "time": 1.1597630977630615}, {"layer": 20, "module": "self_attn.o_proj", "avg_loss": 9.753579760354663, "time": 1.1012401580810547}, {"layer": 20, "module": "mlp.up_proj", "avg_loss": 420.36058407738096, "time": 1.099855661392212}, {"layer": 20, "module": "mlp.gate_proj", "avg_loss": 434.1915302579365, "time": 1.1004929542541504}, {"layer": 20, "module": "mlp.down_proj", "avg_loss": 32.557861328125, "time": 3.0968644618988037}, {"layer": 21, "module": "self_attn.k_proj", "avg_loss": 82.18768601190476, "time": 1.0931856632232666}, {"layer": 21, "module": "self_attn.v_proj", "avg_loss": 23.04560004340278, "time": 1.0619561672210693}, {"layer": 21, "module": "self_attn.q_proj", "avg_loss": 409.4540550595238, "time": 1.1179444789886475}, {"layer": 21, "module": "self_attn.o_proj", "avg_loss": 7.0573226686507935, "time": 1.1001451015472412}, {"layer": 21, "module": "mlp.up_proj", "avg_loss": 432.92010788690476, "time": 1.1255974769592285}, {"layer": 21, "module": "mlp.gate_proj", "avg_loss": 449.8785652281746, "time": 1.10776948928833}, {"layer": 21, "module": "mlp.down_proj", "avg_loss": 31.302162775917658, "time": 3.08528208732605}, {"layer": 22, "module": "self_attn.k_proj", "avg_loss": 84.07867528521825, "time": 1.0940558910369873}, {"layer": 22, "module": "self_attn.v_proj", "avg_loss": 26.186858646453373, "time": 1.0802934169769287}, {"layer": 22, "module": "self_attn.q_proj", "avg_loss": 403.16282242063494, "time": 1.0730643272399902}, {"layer": 22, "module": "self_attn.o_proj", "avg_loss": 5.672463553292411, "time": 1.1538677215576172}, {"layer": 22, "module": "mlp.up_proj", "avg_loss": 424.1214657738095, "time": 1.1301774978637695}, {"layer": 22, "module": "mlp.gate_proj", "avg_loss": 446.7570064484127, "time": 1.1073684692382812}, {"layer": 22, "module": "mlp.down_proj", "avg_loss": 27.49591548859127, "time": 2.989518165588379}, {"layer": 23, "module": "self_attn.k_proj", "avg_loss": 87.36101810515873, "time": 1.0699570178985596}, {"layer": 23, "module": "self_attn.v_proj", "avg_loss": 27.817142547123016, "time": 1.109374761581421}, {"layer": 23, "module": "self_attn.q_proj", "avg_loss": 425.0745907738095, "time": 1.1540558338165283}, {"layer": 23, "module": "self_attn.o_proj", "avg_loss": 6.969093443855407, "time": 1.0997841358184814}, {"layer": 23, "module": "mlp.up_proj", "avg_loss": 423.90082465277777, "time": 1.1589155197143555}, {"layer": 23, "module": "mlp.gate_proj", "avg_loss": 432.5630580357143, "time": 1.1858537197113037}, {"layer": 23, "module": "mlp.down_proj", "avg_loss": 23.34705558655754, "time": 3.072192668914795}, {"layer": 24, "module": "self_attn.k_proj", "avg_loss": 61.61936538938492, "time": 1.0948021411895752}, {"layer": 24, "module": "self_attn.v_proj", "avg_loss": 36.49243939112103, "time": 1.0816264152526855}, {"layer": 24, "module": "self_attn.q_proj", "avg_loss": 341.06206597222223, "time": 1.1163618564605713}, {"layer": 24, "module": "self_attn.o_proj", "avg_loss": 2.9718126569475447, "time": 1.1334948539733887}, {"layer": 24, "module": "mlp.up_proj", "avg_loss": 399.6433841765873, "time": 1.1013755798339844}, {"layer": 24, "module": "mlp.gate_proj", "avg_loss": 399.83897569444446, "time": 1.1929385662078857}, {"layer": 24, "module": "mlp.down_proj", "avg_loss": 18.28245132688492, "time": 2.953122854232788}, {"layer": 25, "module": "self_attn.k_proj", "avg_loss": 58.99961635044643, "time": 1.0983376502990723}, {"layer": 25, "module": "self_attn.v_proj", "avg_loss": 12.953814794146826, "time": 1.1294598579406738}, {"layer": 25, "module": "self_attn.q_proj", "avg_loss": 230.09988839285714, "time": 1.08957839012146}, {"layer": 25, "module": "self_attn.o_proj", "avg_loss": 4.149057055276538, "time": 1.1181068420410156}, {"layer": 25, "module": "mlp.up_proj", "avg_loss": 315.1570560515873, "time": 1.1241559982299805}, {"layer": 25, "module": "mlp.gate_proj", "avg_loss": 312.53391617063494, "time": 1.1571462154388428}, {"layer": 25, "module": "mlp.down_proj", "avg_loss": 13.899421812996032, "time": 3.0588958263397217}, {"layer": 26, "module": "self_attn.k_proj", "avg_loss": 76.80263361855158, "time": 1.064462661743164}, {"layer": 26, "module": "self_attn.v_proj", "avg_loss": 17.88728259858631, "time": 1.0673956871032715}, {"layer": 26, "module": "self_attn.q_proj", "avg_loss": 315.0748697916667, "time": 1.1189231872558594}, {"layer": 26, "module": "self_attn.o_proj", "avg_loss": 3.471431187220982, "time": 1.1842772960662842}, {"layer": 26, "module": "mlp.up_proj", "avg_loss": 317.98418898809524, "time": 1.1545181274414062}, {"layer": 26, "module": "mlp.gate_proj", "avg_loss": 325.34840029761904, "time": 1.0957756042480469}, {"layer": 26, "module": "mlp.down_proj", "avg_loss": 17.539676726810516, "time": 3.104623794555664}, {"layer": 27, "module": "self_attn.k_proj", "avg_loss": 71.45694599454364, "time": 1.1252243518829346}, {"layer": 27, "module": "self_attn.v_proj", "avg_loss": 16.19779556516617, "time": 1.1035175323486328}, {"layer": 27, "module": "self_attn.q_proj", "avg_loss": 297.9996279761905, "time": 1.0766501426696777}, {"layer": 27, "module": "self_attn.o_proj", "avg_loss": 6.460107228112599, "time": 1.103651523590088}, {"layer": 27, "module": "mlp.up_proj", "avg_loss": 332.1039806547619, "time": 1.0983338356018066}, {"layer": 27, "module": "mlp.gate_proj", "avg_loss": 333.4614955357143, "time": 1.1005802154541016}, {"layer": 27, "module": "mlp.down_proj", "avg_loss": 18.76010470920139, "time": 3.023144245147705}, {"layer": 28, "module": "self_attn.k_proj", "avg_loss": 86.26468718998017, "time": 1.1476001739501953}, {"layer": 28, "module": "self_attn.v_proj", "avg_loss": 19.86831519717262, "time": 1.1590206623077393}, {"layer": 28, "module": "self_attn.q_proj", "avg_loss": 418.45855034722223, "time": 1.1424691677093506}, {"layer": 28, "module": "self_attn.o_proj", "avg_loss": 5.406632680741567, "time": 1.0790719985961914}, {"layer": 28, "module": "mlp.up_proj", "avg_loss": 368.95814732142856, "time": 1.1012475490570068}, {"layer": 28, "module": "mlp.gate_proj", "avg_loss": 368.57818700396825, "time": 1.080552577972412}, {"layer": 28, "module": "mlp.down_proj", "avg_loss": 22.431332542782737, "time": 2.9886314868927}, {"layer": 29, "module": "self_attn.k_proj", "avg_loss": 60.96727740575397, "time": 1.0725781917572021}, {"layer": 29, "module": "self_attn.v_proj", "avg_loss": 17.263997395833332, "time": 1.069404125213623}, {"layer": 29, "module": "self_attn.q_proj", "avg_loss": 283.6371217757937, "time": 1.1074695587158203}, {"layer": 29, "module": "self_attn.o_proj", "avg_loss": 5.726629348028274, "time": 1.0732767581939697}, {"layer": 29, "module": "mlp.up_proj", "avg_loss": 368.9641617063492, "time": 1.0906786918640137}, {"layer": 29, "module": "mlp.gate_proj", "avg_loss": 361.9811197916667, "time": 1.102978229522705}, {"layer": 29, "module": "mlp.down_proj", "avg_loss": 25.116493830605158, "time": 3.1027069091796875}, {"layer": 30, "module": "self_attn.k_proj", "avg_loss": 84.19173177083333, "time": 1.0682048797607422}, {"layer": 30, "module": "self_attn.v_proj", "avg_loss": 14.512307787698413, "time": 1.0605857372283936}, {"layer": 30, "module": "self_attn.q_proj", "avg_loss": 358.79005456349205, "time": 1.1335175037384033}, {"layer": 30, "module": "self_attn.o_proj", "avg_loss": 7.705166286892361, "time": 1.0986011028289795}, {"layer": 30, "module": "mlp.up_proj", "avg_loss": 381.92925347222223, "time": 1.104168176651001}, {"layer": 30, "module": "mlp.gate_proj", "avg_loss": 365.91269841269843, "time": 1.1318495273590088}, {"layer": 30, "module": "mlp.down_proj", "avg_loss": 26.59479437934028, "time": 2.9911532402038574}, {"layer": 31, "module": "self_attn.k_proj", "avg_loss": 82.68626767113095, "time": 1.0885052680969238}, {"layer": 31, "module": "self_attn.v_proj", "avg_loss": 22.948453388516864, "time": 1.0999746322631836}, {"layer": 31, "module": "self_attn.q_proj", "avg_loss": 415.6162574404762, "time": 1.0833866596221924}, {"layer": 31, "module": "self_attn.o_proj", "avg_loss": 6.402848501054067, "time": 1.152817726135254}, {"layer": 31, "module": "mlp.up_proj", "avg_loss": 401.392082093254, "time": 1.1582632064819336}, {"layer": 31, "module": "mlp.gate_proj", "avg_loss": 383.4629836309524, "time": 1.1352782249450684}, {"layer": 31, "module": "mlp.down_proj", "avg_loss": 29.913266136532737, "time": 3.1177799701690674}, {"layer": 32, "module": "self_attn.k_proj", "avg_loss": 74.03087797619048, "time": 1.1143438816070557}, {"layer": 32, "module": "self_attn.v_proj", "avg_loss": 25.16476779513889, "time": 1.104729175567627}, {"layer": 32, "module": "self_attn.q_proj", "avg_loss": 374.09613715277777, "time": 1.10874342918396}, {"layer": 32, "module": "self_attn.o_proj", "avg_loss": 8.47125244140625, "time": 1.145249366760254}, {"layer": 32, "module": "mlp.up_proj", "avg_loss": 444.85909598214283, "time": 1.1328582763671875}, {"layer": 32, "module": "mlp.gate_proj", "avg_loss": 416.3987785218254, "time": 1.1104741096496582}, {"layer": 32, "module": "mlp.down_proj", "avg_loss": 33.63235522073413, "time": 3.2785208225250244}, {"layer": 33, "module": "self_attn.k_proj", "avg_loss": 77.18236917162699, "time": 1.1405746936798096}, {"layer": 33, "module": "self_attn.v_proj", "avg_loss": 26.371787419394842, "time": 1.1544876098632812}, {"layer": 33, "module": "self_attn.q_proj", "avg_loss": 399.5327380952381, "time": 1.103954553604126}, {"layer": 33, "module": "self_attn.o_proj", "avg_loss": 8.668340773809524, "time": 1.098799228668213}, {"layer": 33, "module": "mlp.up_proj", "avg_loss": 462.78679935515873, "time": 1.1377756595611572}, {"layer": 33, "module": "mlp.gate_proj", "avg_loss": 431.08091517857144, "time": 1.1503369808197021}, {"layer": 33, "module": "mlp.down_proj", "avg_loss": 42.20195467509921, "time": 3.0726776123046875}, {"layer": 34, "module": "self_attn.k_proj", "avg_loss": 75.28280009920636, "time": 1.1252851486206055}, {"layer": 34, "module": "self_attn.v_proj", "avg_loss": 27.69292534722222, "time": 1.1175153255462646}, {"layer": 34, "module": "self_attn.q_proj", "avg_loss": 420.0869915674603, "time": 1.1159067153930664}, {"layer": 34, "module": "self_attn.o_proj", "avg_loss": 13.022717672681052, "time": 1.1240835189819336}, {"layer": 34, "module": "mlp.up_proj", "avg_loss": 477.3080667162698, "time": 1.128347396850586}, {"layer": 34, "module": "mlp.gate_proj", "avg_loss": 431.1564670138889, "time": 1.102379560470581}, {"layer": 34, "module": "mlp.down_proj", "avg_loss": 54.22759331597222, "time": 3.012789011001587}, {"layer": 35, "module": "self_attn.k_proj", "avg_loss": 71.90093315972223, "time": 1.0904264450073242}, {"layer": 35, "module": "self_attn.v_proj", "avg_loss": 37.20582992311508, "time": 1.0949444770812988}, {"layer": 35, "module": "self_attn.q_proj", "avg_loss": 429.65370783730157, "time": 1.0749366283416748}, {"layer": 35, "module": "self_attn.o_proj", "avg_loss": 16.259742373511905, "time": 1.1497812271118164}, {"layer": 35, "module": "mlp.up_proj", "avg_loss": 522.202380952381, "time": 1.1694378852844238}, {"layer": 35, "module": "mlp.gate_proj", "avg_loss": 466.63182043650795, "time": 1.1190478801727295}, {"layer": 35, "module": "mlp.down_proj", "avg_loss": 69.57046750992063, "time": 3.0079715251922607}, {"layer": 36, "module": "self_attn.k_proj", "avg_loss": 75.05033172123017, "time": 1.1308567523956299}, {"layer": 36, "module": "self_attn.v_proj", "avg_loss": 44.142903645833336, "time": 1.0827674865722656}, {"layer": 36, "module": "self_attn.q_proj", "avg_loss": 434.10475570436506, "time": 1.1486053466796875}, {"layer": 36, "module": "self_attn.o_proj", "avg_loss": 17.31020972842262, "time": 1.138695478439331}, {"layer": 36, "module": "mlp.up_proj", "avg_loss": 613.8492683531746, "time": 1.1521124839782715}, {"layer": 36, "module": "mlp.gate_proj", "avg_loss": 559.0457589285714, "time": 1.1000781059265137}, {"layer": 36, "module": "mlp.down_proj", "avg_loss": 83.90668402777777, "time": 3.0185799598693848}, {"layer": 37, "module": "self_attn.k_proj", "avg_loss": 79.34766400049604, "time": 1.094294786453247}, {"layer": 37, "module": "self_attn.v_proj", "avg_loss": 38.484250992063494, "time": 1.076303243637085}, {"layer": 37, "module": "self_attn.q_proj", "avg_loss": 446.5607638888889, "time": 1.0777819156646729}, {"layer": 37, "module": "self_attn.o_proj", "avg_loss": 13.15394519624256, "time": 1.0879950523376465}, {"layer": 37, "module": "mlp.up_proj", "avg_loss": 684.8478422619048, "time": 1.108414649963379}, {"layer": 37, "module": "mlp.gate_proj", "avg_loss": 630.9422123015873, "time": 1.1150753498077393}, {"layer": 37, "module": "mlp.down_proj", "avg_loss": 113.50037977430556, "time": 3.142279863357544}, {"layer": 38, "module": "self_attn.k_proj", "avg_loss": 78.22545417906746, "time": 1.1754488945007324}, {"layer": 38, "module": "self_attn.v_proj", "avg_loss": 45.7513175843254, "time": 1.1308307647705078}, {"layer": 38, "module": "self_attn.q_proj", "avg_loss": 452.98939732142856, "time": 1.1087076663970947}, {"layer": 38, "module": "self_attn.o_proj", "avg_loss": 14.88142322358631, "time": 1.1080691814422607}, {"layer": 38, "module": "mlp.up_proj", "avg_loss": 738.2503720238095, "time": 1.116492748260498}, {"layer": 38, "module": "mlp.gate_proj", "avg_loss": 698.8869047619048, "time": 1.1112759113311768}, {"layer": 38, "module": "mlp.down_proj", "avg_loss": 142.57091703869048, "time": 2.992093086242676}, {"layer": 39, "module": "self_attn.k_proj", "avg_loss": 77.48240637400794, "time": 1.0751302242279053}, {"layer": 39, "module": "self_attn.v_proj", "avg_loss": 61.43016803075397, "time": 1.0621981620788574}, {"layer": 39, "module": "self_attn.q_proj", "avg_loss": 484.51165674603175, "time": 1.0612802505493164}, {"layer": 39, "module": "self_attn.o_proj", "avg_loss": 26.506080264136905, "time": 1.1733572483062744}, {"layer": 39, "module": "mlp.up_proj", "avg_loss": 865.737537202381, "time": 1.122154951095581}, {"layer": 39, "module": "mlp.gate_proj", "avg_loss": 812.9959697420635, "time": 1.1136829853057861}, {"layer": 39, "module": "mlp.down_proj", "avg_loss": 210.9148685515873, "time": 3.123718500137329}, {"layer": 40, "module": "self_attn.k_proj", "avg_loss": 73.62781343005952, "time": 1.1231231689453125}, {"layer": 40, "module": "self_attn.v_proj", "avg_loss": 80.09847005208333, "time": 1.147946834564209}, {"layer": 40, "module": "self_attn.q_proj", "avg_loss": 514.6433531746031, "time": 1.0724728107452393}, {"layer": 40, "module": "self_attn.o_proj", "avg_loss": 25.965244838169642, "time": 1.1276135444641113}, {"layer": 40, "module": "mlp.up_proj", "avg_loss": 958.301773313492, "time": 1.1323199272155762}, {"layer": 40, "module": "mlp.gate_proj", "avg_loss": 893.0778149801587, "time": 1.1335737705230713}, {"layer": 40, "module": "mlp.down_proj", "avg_loss": 246.909908234127, "time": 3.082585573196411}, {"layer": 41, "module": "self_attn.k_proj", "avg_loss": 72.3613513764881, "time": 1.1117403507232666}, {"layer": 41, "module": "self_attn.v_proj", "avg_loss": 86.30533854166667, "time": 1.1176605224609375}, {"layer": 41, "module": "self_attn.q_proj", "avg_loss": 508.73108878968253, "time": 1.0997130870819092}, {"layer": 41, "module": "self_attn.o_proj", "avg_loss": 37.696661086309526, "time": 1.1893908977508545}, {"layer": 41, "module": "mlp.up_proj", "avg_loss": 1071.069320436508, "time": 1.2465028762817383}, {"layer": 41, "module": "mlp.gate_proj", "avg_loss": 984.3379836309524, "time": 1.1275596618652344}, {"layer": 41, "module": "mlp.down_proj", "avg_loss": 327.76714409722223, "time": 2.944676637649536}, {"layer": 42, "module": "self_attn.k_proj", "avg_loss": 68.64381820436508, "time": 1.1787502765655518}, {"layer": 42, "module": "self_attn.v_proj", "avg_loss": 125.99906218998017, "time": 1.0443668365478516}, {"layer": 42, "module": "self_attn.q_proj", "avg_loss": 512.225601438492, "time": 1.0385754108428955}, {"layer": 42, "module": "self_attn.o_proj", "avg_loss": 37.76715184771825, "time": 1.0405468940734863}, {"layer": 42, "module": "mlp.up_proj", "avg_loss": 1169.889384920635, "time": 1.114497423171997}, {"layer": 42, "module": "mlp.gate_proj", "avg_loss": 1065.779513888889, "time": 1.1930928230285645}, {"layer": 42, "module": "mlp.down_proj", "avg_loss": 405.1393229166667, "time": 3.083534002304077}, {"layer": 43, "module": "self_attn.k_proj", "avg_loss": 81.92943948412699, "time": 1.1625447273254395}, {"layer": 43, "module": "self_attn.v_proj", "avg_loss": 132.0229259672619, "time": 1.0916264057159424}, {"layer": 43, "module": "self_attn.q_proj", "avg_loss": 569.2506200396825, "time": 1.0812914371490479}, {"layer": 43, "module": "self_attn.o_proj", "avg_loss": 43.317173549107146, "time": 1.1420626640319824}, {"layer": 43, "module": "mlp.up_proj", "avg_loss": 1327.8570188492063, "time": 1.150813102722168}, {"layer": 43, "module": "mlp.gate_proj", "avg_loss": 1187.5653521825398, "time": 1.1393060684204102}, {"layer": 43, "module": "mlp.down_proj", "avg_loss": 497.22296626984127, "time": 3.0828840732574463}, {"layer": 44, "module": "self_attn.k_proj", "avg_loss": 79.07071552579364, "time": 1.0908374786376953}, {"layer": 44, "module": "self_attn.v_proj", "avg_loss": 169.04341827876985, "time": 1.063915729522705}, {"layer": 44, "module": "self_attn.q_proj", "avg_loss": 598.2323908730159, "time": 1.0754973888397217}, {"layer": 44, "module": "self_attn.o_proj", "avg_loss": 39.792759486607146, "time": 1.0803074836730957}, {"layer": 44, "module": "mlp.up_proj", "avg_loss": 1472.5091765873017, "time": 1.0850367546081543}, {"layer": 44, "module": "mlp.gate_proj", "avg_loss": 1293.859871031746, "time": 1.1461353302001953}, {"layer": 44, "module": "mlp.down_proj", "avg_loss": 590.3867807539683, "time": 3.159071683883667}, {"layer": 45, "module": "self_attn.k_proj", "avg_loss": 73.09985739087301, "time": 1.0878758430480957}, {"layer": 45, "module": "self_attn.v_proj", "avg_loss": 215.34427703373015, "time": 1.0897216796875}, {"layer": 45, "module": "self_attn.q_proj", "avg_loss": 598.0025421626984, "time": 1.126990795135498}, {"layer": 45, "module": "self_attn.o_proj", "avg_loss": 61.860715835813494, "time": 1.1584274768829346}, {"layer": 45, "module": "mlp.up_proj", "avg_loss": 1641.1445932539682, "time": 1.1527249813079834}, {"layer": 45, "module": "mlp.gate_proj", "avg_loss": 1434.617435515873, "time": 1.4167754650115967}, {"layer": 45, "module": "mlp.down_proj", "avg_loss": 650.5510912698413, "time": 3.1583237648010254}, {"layer": 46, "module": "self_attn.k_proj", "avg_loss": 69.84474206349206, "time": 1.0867373943328857}, {"layer": 46, "module": "self_attn.v_proj", "avg_loss": 169.17049541170636, "time": 1.0883288383483887}, {"layer": 46, "module": "self_attn.q_proj", "avg_loss": 536.2960069444445, "time": 1.075913429260254}, {"layer": 46, "module": "self_attn.o_proj", "avg_loss": 43.32450164310516, "time": 1.052882432937622}, {"layer": 46, "module": "mlp.up_proj", "avg_loss": 1818.9634176587301, "time": 1.0722603797912598}, {"layer": 46, "module": "mlp.gate_proj", "avg_loss": 1583.813492063492, "time": 1.0873894691467285}, {"layer": 46, "module": "mlp.down_proj", "avg_loss": 747.479972718254, "time": 3.0045576095581055}, {"layer": 47, "module": "self_attn.k_proj", "avg_loss": 71.28694661458333, "time": 1.0766124725341797}, {"layer": 47, "module": "self_attn.v_proj", "avg_loss": 158.01224578373015, "time": 1.0701532363891602}, {"layer": 47, "module": "self_attn.q_proj", "avg_loss": 569.1432291666666, "time": 1.0826919078826904}, {"layer": 47, "module": "self_attn.o_proj", "avg_loss": 122.73910280257937, "time": 1.09765625}, {"layer": 47, "module": "mlp.up_proj", "avg_loss": 1906.8876488095239, "time": 1.2466888427734375}, {"layer": 47, "module": "mlp.gate_proj", "avg_loss": 1672.8942212301588, "time": 1.1620750427246094}, {"layer": 47, "module": "mlp.down_proj", "avg_loss": 937.4084821428571, "time": 3.011678695678711}, {"layer": 48, "module": "self_attn.k_proj", "avg_loss": 62.798223586309526, "time": 1.0594446659088135}, {"layer": 48, "module": "self_attn.v_proj", "avg_loss": 128.58834015376985, "time": 1.0552213191986084}, {"layer": 48, "module": "self_attn.q_proj", "avg_loss": 507.88002232142856, "time": 1.2131154537200928}, {"layer": 48, "module": "self_attn.o_proj", "avg_loss": 86.47597346230158, "time": 1.1948423385620117}, {"layer": 48, "module": "mlp.up_proj", "avg_loss": 2034.5111607142858, "time": 1.2073075771331787}, {"layer": 48, "module": "mlp.gate_proj", "avg_loss": 1869.7926587301588, "time": 1.203303337097168}, {"layer": 48, "module": "mlp.down_proj", "avg_loss": 1927.1170634920634, "time": 3.007661819458008}]
quantize_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "damp_percent": 0.005,
5
+ "desc_act": false,
6
+ "static_groups": false,
7
+ "sym": false,
8
+ "true_sequential": true,
9
+ "model_name_or_path": "/monster/data/model/Yi-1.5-9B-Chat/quant/autogptq_version_pr640_bit4_group128_seq2048_batch1/damp0.005_descFalse_gptq_symFalse_pack_dataTrue_2024-05-14_01-30-25/ptb0_wikitext21024_gr0_dic0_sen0_det0_rate0",
10
+ "model_file_base_name": "model",
11
+ "quant_method": "gptq",
12
+ "checkpoint_format": "gptq",
13
+ "meta": {
14
+ "quantizer": "autogptq:0.8.0.dev1"
15
+ }
16
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
3
+ size 1033105