leonardlin
commited on
Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- config.json +50 -0
- model.safetensors +3 -0
- quant_log.csv +281 -0
- quantize_config.json +21 -0
- special_tokens_map.json +24 -0
- tokenizer.json +3 -0
- tokenizer_config.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "/fsx/ubuntu/.cache/huggingface/hub/models--cyberagent--Mistral-Nemo-Japanese-Instruct-2408/snapshots/8591f78522a5d651209fc9f354c848508db7a3eb",
|
4 |
+
"architectures": [
|
5 |
+
"MistralForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 131072,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 5120,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 1024000,
|
16 |
+
"model_type": "mistral",
|
17 |
+
"num_attention_heads": 32,
|
18 |
+
"num_hidden_layers": 40,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"pad_token_id": 10,
|
21 |
+
"quantization_config": {
|
22 |
+
"bits": 4,
|
23 |
+
"checkpoint_format": "gptq",
|
24 |
+
"desc_act": true,
|
25 |
+
"dynamic": null,
|
26 |
+
"group_size": 128,
|
27 |
+
"lm_head": false,
|
28 |
+
"meta": {
|
29 |
+
"damp_auto_increment": 0.0025,
|
30 |
+
"damp_percent": 0.01,
|
31 |
+
"mse": 0.0,
|
32 |
+
"quantizer": [
|
33 |
+
"gptqmodel:1.7.0"
|
34 |
+
],
|
35 |
+
"static_groups": false,
|
36 |
+
"true_sequential": true,
|
37 |
+
"uri": "https://github.com/modelcloud/gptqmodel"
|
38 |
+
},
|
39 |
+
"quant_method": "gptq",
|
40 |
+
"sym": true
|
41 |
+
},
|
42 |
+
"rms_norm_eps": 1e-05,
|
43 |
+
"rope_theta": 1000000.0,
|
44 |
+
"sliding_window": null,
|
45 |
+
"tie_word_embeddings": false,
|
46 |
+
"torch_dtype": "bfloat16",
|
47 |
+
"transformers_version": "4.48.0",
|
48 |
+
"use_cache": false,
|
49 |
+
"vocab_size": 131074
|
50 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4813d00a78878a856a35f4fd5a13093c84a218fe0bd8facf2852b0ee5175f116
|
3 |
+
size 8357992904
|
quant_log.csv
ADDED
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
layer,module,loss,damp,time
|
2 |
+
0,self_attn.k_proj,0.44781,0.01000,2.558
|
3 |
+
0,self_attn.v_proj,0.00408,0.01000,1.591
|
4 |
+
0,self_attn.q_proj,0.98203,0.01000,1.669
|
5 |
+
0,self_attn.o_proj,0.00040,0.01000,1.427
|
6 |
+
0,mlp.up_proj,0.32010,0.01000,1.935
|
7 |
+
0,mlp.gate_proj,0.34654,0.01000,1.882
|
8 |
+
0,mlp.down_proj,1.33519,0.01000,4.953
|
9 |
+
1,self_attn.k_proj,0.79557,0.01000,1.652
|
10 |
+
1,self_attn.v_proj,0.09610,0.01000,1.600
|
11 |
+
1,self_attn.q_proj,1.61869,0.01000,1.649
|
12 |
+
1,self_attn.o_proj,0.00187,0.01000,1.381
|
13 |
+
1,mlp.up_proj,0.70102,0.01000,1.944
|
14 |
+
1,mlp.gate_proj,0.79192,0.01000,1.883
|
15 |
+
1,mlp.down_proj,0.00560,0.01000,4.833
|
16 |
+
2,self_attn.k_proj,0.62088,0.01000,1.599
|
17 |
+
2,self_attn.v_proj,0.11477,0.01000,1.554
|
18 |
+
2,self_attn.q_proj,1.35341,0.01000,1.647
|
19 |
+
2,self_attn.o_proj,0.00412,0.01000,1.339
|
20 |
+
2,mlp.up_proj,1.57853,0.01000,1.903
|
21 |
+
2,mlp.gate_proj,1.90023,0.01000,1.864
|
22 |
+
2,mlp.down_proj,0.09663,0.01000,4.863
|
23 |
+
3,self_attn.k_proj,1.38215,0.01000,1.575
|
24 |
+
3,self_attn.v_proj,0.20538,0.01000,1.537
|
25 |
+
3,self_attn.q_proj,3.34128,0.01000,1.612
|
26 |
+
3,self_attn.o_proj,0.00580,0.01000,1.327
|
27 |
+
3,mlp.up_proj,1.92553,0.01000,1.876
|
28 |
+
3,mlp.gate_proj,2.43931,0.01000,1.840
|
29 |
+
3,mlp.down_proj,0.02724,0.01000,4.785
|
30 |
+
4,self_attn.k_proj,1.00719,0.01000,1.654
|
31 |
+
4,self_attn.v_proj,0.24622,0.01000,1.594
|
32 |
+
4,self_attn.q_proj,2.40160,0.01000,1.668
|
33 |
+
4,self_attn.o_proj,0.01123,0.01000,1.305
|
34 |
+
4,mlp.up_proj,2.56081,0.01000,1.885
|
35 |
+
4,mlp.gate_proj,3.37699,0.01000,1.873
|
36 |
+
4,mlp.down_proj,0.04623,0.01000,4.708
|
37 |
+
5,self_attn.k_proj,1.34865,0.01000,1.644
|
38 |
+
5,self_attn.v_proj,0.26768,0.01000,1.599
|
39 |
+
5,self_attn.q_proj,3.37576,0.01000,1.678
|
40 |
+
5,self_attn.o_proj,0.01259,0.01000,1.384
|
41 |
+
5,mlp.up_proj,3.42371,0.01000,1.943
|
42 |
+
5,mlp.gate_proj,4.33173,0.01000,1.935
|
43 |
+
5,mlp.down_proj,0.07047,0.01000,4.969
|
44 |
+
6,self_attn.k_proj,1.53340,0.01000,1.648
|
45 |
+
6,self_attn.v_proj,0.26733,0.01000,1.511
|
46 |
+
6,self_attn.q_proj,3.57458,0.01000,1.699
|
47 |
+
6,self_attn.o_proj,0.02864,0.01000,1.394
|
48 |
+
6,mlp.up_proj,4.14674,0.01000,1.947
|
49 |
+
6,mlp.gate_proj,5.33200,0.01000,1.906
|
50 |
+
6,mlp.down_proj,0.10116,0.01000,4.945
|
51 |
+
7,self_attn.k_proj,1.56764,0.01000,1.649
|
52 |
+
7,self_attn.v_proj,0.40399,0.01000,1.576
|
53 |
+
7,self_attn.q_proj,3.97499,0.01000,1.688
|
54 |
+
7,self_attn.o_proj,0.04408,0.01000,1.354
|
55 |
+
7,mlp.up_proj,4.87630,0.01000,1.925
|
56 |
+
7,mlp.gate_proj,5.94558,0.01000,1.907
|
57 |
+
7,mlp.down_proj,0.12133,0.01000,4.885
|
58 |
+
8,self_attn.k_proj,1.67540,0.01000,1.645
|
59 |
+
8,self_attn.v_proj,0.41133,0.01000,1.577
|
60 |
+
8,self_attn.q_proj,4.18493,0.01000,1.660
|
61 |
+
8,self_attn.o_proj,0.04868,0.01000,1.363
|
62 |
+
8,mlp.up_proj,5.63061,0.01000,1.925
|
63 |
+
8,mlp.gate_proj,6.37594,0.01000,1.888
|
64 |
+
8,mlp.down_proj,0.14860,0.01000,4.889
|
65 |
+
9,self_attn.k_proj,1.87517,0.01000,1.640
|
66 |
+
9,self_attn.v_proj,0.61056,0.01000,1.580
|
67 |
+
9,self_attn.q_proj,4.68167,0.01000,1.656
|
68 |
+
9,self_attn.o_proj,0.07940,0.01000,1.412
|
69 |
+
9,mlp.up_proj,6.18172,0.01000,1.917
|
70 |
+
9,mlp.gate_proj,6.56218,0.01000,1.905
|
71 |
+
9,mlp.down_proj,0.17478,0.01000,5.157
|
72 |
+
10,self_attn.k_proj,1.81094,0.01000,1.573
|
73 |
+
10,self_attn.v_proj,0.56928,0.01000,1.504
|
74 |
+
10,self_attn.q_proj,4.66256,0.01000,1.577
|
75 |
+
10,self_attn.o_proj,0.09339,0.01000,1.279
|
76 |
+
10,mlp.up_proj,6.81094,0.01000,1.820
|
77 |
+
10,mlp.gate_proj,6.95654,0.01000,1.800
|
78 |
+
10,mlp.down_proj,0.20208,0.01000,4.639
|
79 |
+
11,self_attn.k_proj,2.23242,0.01000,1.593
|
80 |
+
11,self_attn.v_proj,0.63557,0.01000,1.457
|
81 |
+
11,self_attn.q_proj,5.68136,0.01000,1.523
|
82 |
+
11,self_attn.o_proj,0.11066,0.01000,1.269
|
83 |
+
11,mlp.up_proj,7.74163,0.01000,1.829
|
84 |
+
11,mlp.gate_proj,8.52850,0.01000,1.793
|
85 |
+
11,mlp.down_proj,0.21735,0.01000,4.708
|
86 |
+
12,self_attn.k_proj,2.14512,0.01000,1.607
|
87 |
+
12,self_attn.v_proj,0.81472,0.01000,1.551
|
88 |
+
12,self_attn.q_proj,5.52804,0.01000,1.626
|
89 |
+
12,self_attn.o_proj,0.11780,0.01000,1.354
|
90 |
+
12,mlp.up_proj,8.57336,0.01000,1.839
|
91 |
+
12,mlp.gate_proj,9.36451,0.01000,1.836
|
92 |
+
12,mlp.down_proj,0.24418,0.01000,4.781
|
93 |
+
13,self_attn.k_proj,2.74346,0.01000,1.556
|
94 |
+
13,self_attn.v_proj,0.86579,0.01000,1.513
|
95 |
+
13,self_attn.q_proj,7.15528,0.01000,1.588
|
96 |
+
13,self_attn.o_proj,0.14770,0.01000,1.308
|
97 |
+
13,mlp.up_proj,9.27285,0.01000,1.836
|
98 |
+
13,mlp.gate_proj,9.76769,0.01000,1.809
|
99 |
+
13,mlp.down_proj,0.30997,0.01000,4.784
|
100 |
+
14,self_attn.k_proj,3.07935,0.01000,1.578
|
101 |
+
14,self_attn.v_proj,0.97355,0.01000,1.510
|
102 |
+
14,self_attn.q_proj,8.04559,0.01000,1.576
|
103 |
+
14,self_attn.o_proj,0.13438,0.01000,1.289
|
104 |
+
14,mlp.up_proj,10.34739,0.01000,1.816
|
105 |
+
14,mlp.gate_proj,10.32479,0.01000,1.803
|
106 |
+
14,mlp.down_proj,0.33069,0.01000,4.592
|
107 |
+
15,self_attn.k_proj,2.76976,0.01000,1.560
|
108 |
+
15,self_attn.v_proj,0.90191,0.01000,1.491
|
109 |
+
15,self_attn.q_proj,6.81594,0.01000,1.598
|
110 |
+
15,self_attn.o_proj,0.15275,0.01000,1.304
|
111 |
+
15,mlp.up_proj,10.78706,0.01000,1.839
|
112 |
+
15,mlp.gate_proj,10.14920,0.01000,1.803
|
113 |
+
15,mlp.down_proj,0.37820,0.01000,4.648
|
114 |
+
16,self_attn.k_proj,2.89506,0.01000,1.562
|
115 |
+
16,self_attn.v_proj,1.10457,0.01000,1.498
|
116 |
+
16,self_attn.q_proj,7.11564,0.01000,1.566
|
117 |
+
16,self_attn.o_proj,0.19220,0.01000,1.312
|
118 |
+
16,mlp.up_proj,11.51908,0.01000,1.792
|
119 |
+
16,mlp.gate_proj,10.49399,0.01000,1.799
|
120 |
+
16,mlp.down_proj,0.45250,0.01000,4.627
|
121 |
+
17,self_attn.k_proj,2.67134,0.01000,1.536
|
122 |
+
17,self_attn.v_proj,1.29427,0.01000,1.468
|
123 |
+
17,self_attn.q_proj,7.23714,0.01000,1.547
|
124 |
+
17,self_attn.o_proj,0.25853,0.01000,1.279
|
125 |
+
17,mlp.up_proj,12.74986,0.01000,1.829
|
126 |
+
17,mlp.gate_proj,11.32418,0.01000,1.808
|
127 |
+
17,mlp.down_proj,0.56758,0.01000,4.654
|
128 |
+
18,self_attn.k_proj,3.18899,0.01000,1.538
|
129 |
+
18,self_attn.v_proj,1.35795,0.01000,1.476
|
130 |
+
18,self_attn.q_proj,8.24376,0.01000,1.549
|
131 |
+
18,self_attn.o_proj,0.33599,0.01000,1.268
|
132 |
+
18,mlp.up_proj,14.05334,0.01000,1.830
|
133 |
+
18,mlp.gate_proj,13.10215,0.01000,1.806
|
134 |
+
18,mlp.down_proj,0.67178,0.01000,4.703
|
135 |
+
19,self_attn.k_proj,3.17280,0.01000,1.526
|
136 |
+
19,self_attn.v_proj,1.92177,0.01000,1.471
|
137 |
+
19,self_attn.q_proj,9.49425,0.01000,1.546
|
138 |
+
19,self_attn.o_proj,0.25507,0.01000,1.270
|
139 |
+
19,mlp.up_proj,14.84812,0.01000,1.802
|
140 |
+
19,mlp.gate_proj,14.16978,0.01000,1.778
|
141 |
+
19,mlp.down_proj,0.72501,0.01000,4.595
|
142 |
+
20,self_attn.k_proj,3.33606,0.01000,1.537
|
143 |
+
20,self_attn.v_proj,1.73617,0.01000,1.466
|
144 |
+
20,self_attn.q_proj,9.49494,0.01000,1.541
|
145 |
+
20,self_attn.o_proj,0.22753,0.01000,1.269
|
146 |
+
20,mlp.up_proj,16.01300,0.01000,1.824
|
147 |
+
20,mlp.gate_proj,16.25274,0.01000,1.774
|
148 |
+
20,mlp.down_proj,0.81018,0.01000,4.630
|
149 |
+
21,self_attn.k_proj,3.22668,0.01000,1.535
|
150 |
+
21,self_attn.v_proj,1.52237,0.01000,1.477
|
151 |
+
21,self_attn.q_proj,8.96638,0.01000,1.542
|
152 |
+
21,self_attn.o_proj,0.19833,0.01000,1.264
|
153 |
+
21,mlp.up_proj,17.61123,0.01000,1.805
|
154 |
+
21,mlp.gate_proj,18.52325,0.01000,1.736
|
155 |
+
21,mlp.down_proj,0.93764,0.01000,4.572
|
156 |
+
22,self_attn.k_proj,3.14562,0.01000,1.531
|
157 |
+
22,self_attn.v_proj,2.11292,0.01000,1.581
|
158 |
+
22,self_attn.q_proj,9.41047,0.01000,1.644
|
159 |
+
22,self_attn.o_proj,0.25399,0.01000,1.343
|
160 |
+
22,mlp.up_proj,19.57599,0.01000,1.922
|
161 |
+
22,mlp.gate_proj,21.05490,0.01000,1.860
|
162 |
+
22,mlp.down_proj,1.18958,0.01000,4.783
|
163 |
+
23,self_attn.k_proj,3.40537,0.01000,1.583
|
164 |
+
23,self_attn.v_proj,2.11464,0.01000,1.519
|
165 |
+
23,self_attn.q_proj,9.70642,0.01000,1.642
|
166 |
+
23,self_attn.o_proj,0.25255,0.01000,1.316
|
167 |
+
23,mlp.up_proj,20.87299,0.01000,1.851
|
168 |
+
23,mlp.gate_proj,22.15608,0.01000,1.801
|
169 |
+
23,mlp.down_proj,1.37670,0.01000,4.740
|
170 |
+
24,self_attn.k_proj,3.75815,0.01000,1.583
|
171 |
+
24,self_attn.v_proj,2.27121,0.01000,1.525
|
172 |
+
24,self_attn.q_proj,10.81197,0.01000,1.593
|
173 |
+
24,self_attn.o_proj,0.34032,0.01000,1.311
|
174 |
+
24,mlp.up_proj,22.62236,0.01000,1.854
|
175 |
+
24,mlp.gate_proj,24.52068,0.01000,1.801
|
176 |
+
24,mlp.down_proj,1.42703,0.01000,4.738
|
177 |
+
25,self_attn.k_proj,4.13512,0.01000,1.573
|
178 |
+
25,self_attn.v_proj,2.44008,0.01000,1.531
|
179 |
+
25,self_attn.q_proj,11.73339,0.01000,1.592
|
180 |
+
25,self_attn.o_proj,0.24015,0.01000,1.303
|
181 |
+
25,mlp.up_proj,24.11332,0.01000,1.864
|
182 |
+
25,mlp.gate_proj,26.56258,0.01000,1.850
|
183 |
+
25,mlp.down_proj,1.58580,0.01000,4.749
|
184 |
+
26,self_attn.k_proj,4.14904,0.01000,1.581
|
185 |
+
26,self_attn.v_proj,2.82799,0.01000,1.522
|
186 |
+
26,self_attn.q_proj,11.67034,0.01000,1.617
|
187 |
+
26,self_attn.o_proj,0.26139,0.01000,1.316
|
188 |
+
26,mlp.up_proj,26.17431,0.01000,1.871
|
189 |
+
26,mlp.gate_proj,29.35139,0.01000,1.812
|
190 |
+
26,mlp.down_proj,1.69596,0.01000,4.668
|
191 |
+
27,self_attn.k_proj,4.35806,0.01000,1.562
|
192 |
+
27,self_attn.v_proj,3.28379,0.01000,1.510
|
193 |
+
27,self_attn.q_proj,12.86621,0.01000,1.556
|
194 |
+
27,self_attn.o_proj,0.19922,0.01000,1.291
|
195 |
+
27,mlp.up_proj,28.43472,0.01000,1.818
|
196 |
+
27,mlp.gate_proj,32.34059,0.01000,1.805
|
197 |
+
27,mlp.down_proj,1.85067,0.01000,4.677
|
198 |
+
28,self_attn.k_proj,4.58890,0.01000,1.558
|
199 |
+
28,self_attn.v_proj,3.38870,0.01000,1.544
|
200 |
+
28,self_attn.q_proj,12.99836,0.01000,1.609
|
201 |
+
28,self_attn.o_proj,0.40315,0.01000,1.288
|
202 |
+
28,mlp.up_proj,30.73429,0.01000,1.838
|
203 |
+
28,mlp.gate_proj,34.49887,0.01000,1.803
|
204 |
+
28,mlp.down_proj,2.12352,0.01000,4.715
|
205 |
+
29,self_attn.k_proj,4.48693,0.01000,1.547
|
206 |
+
29,self_attn.v_proj,4.56156,0.01000,1.494
|
207 |
+
29,self_attn.q_proj,13.25546,0.01000,1.557
|
208 |
+
29,self_attn.o_proj,0.52033,0.01000,1.288
|
209 |
+
29,mlp.up_proj,33.86432,0.01000,1.838
|
210 |
+
29,mlp.gate_proj,38.18370,0.01000,1.827
|
211 |
+
29,mlp.down_proj,2.39000,0.01000,4.666
|
212 |
+
30,self_attn.k_proj,5.06714,0.01000,1.558
|
213 |
+
30,self_attn.v_proj,4.07894,0.01000,1.493
|
214 |
+
30,self_attn.q_proj,14.36550,0.01000,1.553
|
215 |
+
30,self_attn.o_proj,0.32974,0.01000,1.292
|
216 |
+
30,mlp.up_proj,37.26863,0.01000,1.833
|
217 |
+
30,mlp.gate_proj,42.20907,0.01000,1.812
|
218 |
+
30,mlp.down_proj,2.83160,0.01000,4.676
|
219 |
+
31,self_attn.k_proj,4.59491,0.01000,1.558
|
220 |
+
31,self_attn.v_proj,4.15888,0.01000,1.484
|
221 |
+
31,self_attn.q_proj,13.28740,0.01000,1.571
|
222 |
+
31,self_attn.o_proj,0.52802,0.01000,1.299
|
223 |
+
31,mlp.up_proj,40.37086,0.01000,1.822
|
224 |
+
31,mlp.gate_proj,44.68778,0.01000,1.842
|
225 |
+
31,mlp.down_proj,3.32915,0.01000,4.691
|
226 |
+
32,self_attn.k_proj,4.66300,0.01000,1.562
|
227 |
+
32,self_attn.v_proj,5.71529,0.01000,1.564
|
228 |
+
32,self_attn.q_proj,14.51872,0.01000,1.615
|
229 |
+
32,self_attn.o_proj,0.50025,0.01000,1.299
|
230 |
+
32,mlp.up_proj,43.65151,0.01000,1.881
|
231 |
+
32,mlp.gate_proj,47.48747,0.01000,1.856
|
232 |
+
32,mlp.down_proj,3.97576,0.01000,4.794
|
233 |
+
33,self_attn.k_proj,5.03356,0.01000,1.614
|
234 |
+
33,self_attn.v_proj,4.62488,0.01000,1.533
|
235 |
+
33,self_attn.q_proj,15.38677,0.01000,1.649
|
236 |
+
33,self_attn.o_proj,0.70857,0.01000,1.304
|
237 |
+
33,mlp.up_proj,46.69897,0.01000,1.818
|
238 |
+
33,mlp.gate_proj,49.17135,0.01000,1.795
|
239 |
+
33,mlp.down_proj,5.12732,0.01000,4.632
|
240 |
+
34,self_attn.k_proj,4.54628,0.01000,1.548
|
241 |
+
34,self_attn.v_proj,9.44273,0.01000,1.499
|
242 |
+
34,self_attn.q_proj,14.90588,0.01000,1.561
|
243 |
+
34,self_attn.o_proj,1.40365,0.01000,1.295
|
244 |
+
34,mlp.up_proj,51.07263,0.01000,1.830
|
245 |
+
34,mlp.gate_proj,53.14738,0.01000,1.809
|
246 |
+
34,mlp.down_proj,5.80323,0.01000,4.666
|
247 |
+
35,self_attn.k_proj,4.90025,0.01000,1.562
|
248 |
+
35,self_attn.v_proj,8.61317,0.01000,1.610
|
249 |
+
35,self_attn.q_proj,15.80265,0.01000,1.706
|
250 |
+
35,self_attn.o_proj,0.94142,0.01000,1.394
|
251 |
+
35,mlp.up_proj,55.80676,0.01000,1.944
|
252 |
+
35,mlp.gate_proj,57.10633,0.01000,1.925
|
253 |
+
35,mlp.down_proj,7.06579,0.01000,4.926
|
254 |
+
36,self_attn.k_proj,4.54035,0.01000,1.636
|
255 |
+
36,self_attn.v_proj,10.06557,0.01000,1.587
|
256 |
+
36,self_attn.q_proj,15.33328,0.01000,1.639
|
257 |
+
36,self_attn.o_proj,1.34831,0.01000,1.369
|
258 |
+
36,mlp.up_proj,60.14465,0.01000,1.918
|
259 |
+
36,mlp.gate_proj,59.68049,0.01000,1.918
|
260 |
+
36,mlp.down_proj,9.04149,0.01000,4.909
|
261 |
+
37,self_attn.k_proj,4.00503,0.01000,1.643
|
262 |
+
37,self_attn.v_proj,11.37017,0.01000,1.575
|
263 |
+
37,self_attn.q_proj,15.57561,0.01000,1.655
|
264 |
+
37,self_attn.o_proj,3.25508,0.01000,1.351
|
265 |
+
37,mlp.up_proj,60.57056,0.01000,1.938
|
266 |
+
37,mlp.gate_proj,60.60658,0.01000,1.881
|
267 |
+
37,mlp.down_proj,11.87961,0.01000,4.893
|
268 |
+
38,self_attn.k_proj,4.87617,0.01000,1.625
|
269 |
+
38,self_attn.v_proj,16.71599,0.01000,1.587
|
270 |
+
38,self_attn.q_proj,17.29685,0.01000,1.665
|
271 |
+
38,self_attn.o_proj,3.02600,0.01000,1.354
|
272 |
+
38,mlp.up_proj,60.15144,0.01000,1.916
|
273 |
+
38,mlp.gate_proj,61.47735,0.01000,1.893
|
274 |
+
38,mlp.down_proj,15.26236,0.01000,4.955
|
275 |
+
39,self_attn.k_proj,3.92169,0.01000,1.618
|
276 |
+
39,self_attn.v_proj,9.01665,0.01000,1.519
|
277 |
+
39,self_attn.q_proj,14.09749,0.01000,1.915
|
278 |
+
39,self_attn.o_proj,2.40031,0.01000,1.344
|
279 |
+
39,mlp.up_proj,60.70610,0.01000,1.977
|
280 |
+
39,mlp.gate_proj,64.54776,0.01000,1.937
|
281 |
+
39,mlp.down_proj,25.92845,0.01000,4.972
|
quantize_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": 4,
|
3 |
+
"dynamic": null,
|
4 |
+
"group_size": 128,
|
5 |
+
"desc_act": true,
|
6 |
+
"sym": true,
|
7 |
+
"lm_head": false,
|
8 |
+
"quant_method": "gptq",
|
9 |
+
"checkpoint_format": "gptq",
|
10 |
+
"meta": {
|
11 |
+
"quantizer": [
|
12 |
+
"gptqmodel:1.7.0"
|
13 |
+
],
|
14 |
+
"uri": "https://github.com/modelcloud/gptqmodel",
|
15 |
+
"damp_percent": 0.01,
|
16 |
+
"damp_auto_increment": 0.0025,
|
17 |
+
"static_groups": false,
|
18 |
+
"true_sequential": true,
|
19 |
+
"mse": 0.0
|
20 |
+
}
|
21 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|im_end|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<pad>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<unk>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea88f9940a84ab7e0100bc369506a28ec8d5d821691dc47d4dd63f1bbdf105ed
|
3 |
+
size 17078669
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|