feihu.hf commited on
Commit
46d2e43
1 Parent(s): a522f73

update weights

Browse files
config.json CHANGED
@@ -8,7 +8,7 @@
8
  "hidden_act": "silu",
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 27392,
12
  "max_position_embeddings": 32768,
13
  "max_window_layers": 35,
14
  "model_type": "qwen2",
 
8
  "hidden_act": "silu",
9
  "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 27648,
12
  "max_position_embeddings": 32768,
13
  "max_window_layers": 35,
14
  "model_type": "qwen2",
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d85e760402974e233ab592c853d6eb8cb0e10576d85032c7f1df1e3416bbd9f9
3
- size 3998665856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4de668333474d7a285deaa6bae2f7e5efd06738f5ba1bbd602d83d4213aa2e4
3
+ size 3947080608
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a87f33082d1176a513f05bf8b321acf04e5a7e593ca7d966595b88223c03afc
3
- size 3956139600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ada2b0935d0e40994796e48f1a404a4d60493d2fb7ba146dbabb1cd52ea63b3f
3
+ size 3985864968
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb8287f05db6fd6268ec1d37cf7e39b60f825e52f6940ba16869ad3aa90c7426
3
- size 3953828136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e996d2678b01df627a148d7dace3ed9e04f8a195d3bb337cdf835c8ae35a3c3f
3
+ size 3953094512
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa61bbc5f99ca27dadfbb1f6c166b4edf8c48cb61dc80880008edf33fc384a9c
3
- size 3996763312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e4959dd115c50c563b570f61fb1adea49d502873913074ec3ef0dc77ca6eee
3
+ size 3985910056
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a81aa5633c2ca78f99599e05e03a1abbedb95266b7f444679002ed0b8d59ecd7
3
- size 3316128672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f548b49f2f5af13fb5bf722c1b871b7032b5bfe3bdc7ca006b941ac055907600
3
+ size 3480450840
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 19221260288
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00005-of-00005.safetensors",
@@ -672,11 +672,11 @@
672
  "model.layers.24.self_attn.v_proj.qzeros": "model-00002-of-00005.safetensors",
673
  "model.layers.24.self_attn.v_proj.scales": "model-00002-of-00005.safetensors",
674
  "model.layers.25.input_layernorm.weight": "model-00003-of-00005.safetensors",
675
- "model.layers.25.mlp.down_proj.bias": "model-00002-of-00005.safetensors",
676
- "model.layers.25.mlp.down_proj.g_idx": "model-00002-of-00005.safetensors",
677
- "model.layers.25.mlp.down_proj.qweight": "model-00002-of-00005.safetensors",
678
- "model.layers.25.mlp.down_proj.qzeros": "model-00002-of-00005.safetensors",
679
- "model.layers.25.mlp.down_proj.scales": "model-00002-of-00005.safetensors",
680
  "model.layers.25.mlp.gate_proj.bias": "model-00003-of-00005.safetensors",
681
  "model.layers.25.mlp.gate_proj.g_idx": "model-00003-of-00005.safetensors",
682
  "model.layers.25.mlp.gate_proj.qweight": "model-00003-of-00005.safetensors",
@@ -1300,7 +1300,7 @@
1300
  "model.layers.4.self_attn.v_proj.qweight": "model-00001-of-00005.safetensors",
1301
  "model.layers.4.self_attn.v_proj.qzeros": "model-00001-of-00005.safetensors",
1302
  "model.layers.4.self_attn.v_proj.scales": "model-00001-of-00005.safetensors",
1303
- "model.layers.40.input_layernorm.weight": "model-00003-of-00005.safetensors",
1304
  "model.layers.40.mlp.down_proj.bias": "model-00003-of-00005.safetensors",
1305
  "model.layers.40.mlp.down_proj.g_idx": "model-00003-of-00005.safetensors",
1306
  "model.layers.40.mlp.down_proj.qweight": "model-00003-of-00005.safetensors",
@@ -1311,12 +1311,12 @@
1311
  "model.layers.40.mlp.gate_proj.qweight": "model-00003-of-00005.safetensors",
1312
  "model.layers.40.mlp.gate_proj.qzeros": "model-00003-of-00005.safetensors",
1313
  "model.layers.40.mlp.gate_proj.scales": "model-00003-of-00005.safetensors",
1314
- "model.layers.40.mlp.up_proj.bias": "model-00003-of-00005.safetensors",
1315
- "model.layers.40.mlp.up_proj.g_idx": "model-00003-of-00005.safetensors",
1316
- "model.layers.40.mlp.up_proj.qweight": "model-00003-of-00005.safetensors",
1317
- "model.layers.40.mlp.up_proj.qzeros": "model-00003-of-00005.safetensors",
1318
- "model.layers.40.mlp.up_proj.scales": "model-00003-of-00005.safetensors",
1319
- "model.layers.40.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
1320
  "model.layers.40.self_attn.k_proj.bias": "model-00003-of-00005.safetensors",
1321
  "model.layers.40.self_attn.k_proj.g_idx": "model-00003-of-00005.safetensors",
1322
  "model.layers.40.self_attn.k_proj.qweight": "model-00003-of-00005.safetensors",
@@ -1354,26 +1354,26 @@
1354
  "model.layers.41.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
1355
  "model.layers.41.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
1356
  "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
1357
- "model.layers.41.self_attn.k_proj.bias": "model-00003-of-00005.safetensors",
1358
- "model.layers.41.self_attn.k_proj.g_idx": "model-00003-of-00005.safetensors",
1359
- "model.layers.41.self_attn.k_proj.qweight": "model-00003-of-00005.safetensors",
1360
- "model.layers.41.self_attn.k_proj.qzeros": "model-00003-of-00005.safetensors",
1361
- "model.layers.41.self_attn.k_proj.scales": "model-00003-of-00005.safetensors",
1362
- "model.layers.41.self_attn.o_proj.bias": "model-00003-of-00005.safetensors",
1363
- "model.layers.41.self_attn.o_proj.g_idx": "model-00003-of-00005.safetensors",
1364
- "model.layers.41.self_attn.o_proj.qweight": "model-00003-of-00005.safetensors",
1365
- "model.layers.41.self_attn.o_proj.qzeros": "model-00003-of-00005.safetensors",
1366
- "model.layers.41.self_attn.o_proj.scales": "model-00003-of-00005.safetensors",
1367
- "model.layers.41.self_attn.q_proj.bias": "model-00003-of-00005.safetensors",
1368
- "model.layers.41.self_attn.q_proj.g_idx": "model-00003-of-00005.safetensors",
1369
- "model.layers.41.self_attn.q_proj.qweight": "model-00003-of-00005.safetensors",
1370
- "model.layers.41.self_attn.q_proj.qzeros": "model-00003-of-00005.safetensors",
1371
- "model.layers.41.self_attn.q_proj.scales": "model-00003-of-00005.safetensors",
1372
- "model.layers.41.self_attn.v_proj.bias": "model-00003-of-00005.safetensors",
1373
- "model.layers.41.self_attn.v_proj.g_idx": "model-00003-of-00005.safetensors",
1374
- "model.layers.41.self_attn.v_proj.qweight": "model-00003-of-00005.safetensors",
1375
- "model.layers.41.self_attn.v_proj.qzeros": "model-00003-of-00005.safetensors",
1376
- "model.layers.41.self_attn.v_proj.scales": "model-00003-of-00005.safetensors",
1377
  "model.layers.42.input_layernorm.weight": "model-00004-of-00005.safetensors",
1378
  "model.layers.42.mlp.down_proj.bias": "model-00004-of-00005.safetensors",
1379
  "model.layers.42.mlp.down_proj.g_idx": "model-00004-of-00005.safetensors",
@@ -1929,23 +1929,23 @@
1929
  "model.layers.55.self_attn.v_proj.qweight": "model-00004-of-00005.safetensors",
1930
  "model.layers.55.self_attn.v_proj.qzeros": "model-00004-of-00005.safetensors",
1931
  "model.layers.55.self_attn.v_proj.scales": "model-00004-of-00005.safetensors",
1932
- "model.layers.56.input_layernorm.weight": "model-00004-of-00005.safetensors",
1933
  "model.layers.56.mlp.down_proj.bias": "model-00004-of-00005.safetensors",
1934
  "model.layers.56.mlp.down_proj.g_idx": "model-00004-of-00005.safetensors",
1935
  "model.layers.56.mlp.down_proj.qweight": "model-00004-of-00005.safetensors",
1936
  "model.layers.56.mlp.down_proj.qzeros": "model-00004-of-00005.safetensors",
1937
  "model.layers.56.mlp.down_proj.scales": "model-00004-of-00005.safetensors",
1938
- "model.layers.56.mlp.gate_proj.bias": "model-00004-of-00005.safetensors",
1939
- "model.layers.56.mlp.gate_proj.g_idx": "model-00004-of-00005.safetensors",
1940
- "model.layers.56.mlp.gate_proj.qweight": "model-00004-of-00005.safetensors",
1941
- "model.layers.56.mlp.gate_proj.qzeros": "model-00004-of-00005.safetensors",
1942
- "model.layers.56.mlp.gate_proj.scales": "model-00004-of-00005.safetensors",
1943
- "model.layers.56.mlp.up_proj.bias": "model-00004-of-00005.safetensors",
1944
- "model.layers.56.mlp.up_proj.g_idx": "model-00004-of-00005.safetensors",
1945
- "model.layers.56.mlp.up_proj.qweight": "model-00004-of-00005.safetensors",
1946
- "model.layers.56.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
1947
- "model.layers.56.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
1948
- "model.layers.56.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
1949
  "model.layers.56.self_attn.k_proj.bias": "model-00004-of-00005.safetensors",
1950
  "model.layers.56.self_attn.k_proj.g_idx": "model-00004-of-00005.safetensors",
1951
  "model.layers.56.self_attn.k_proj.qweight": "model-00004-of-00005.safetensors",
@@ -1983,11 +1983,11 @@
1983
  "model.layers.57.mlp.up_proj.qzeros": "model-00005-of-00005.safetensors",
1984
  "model.layers.57.mlp.up_proj.scales": "model-00005-of-00005.safetensors",
1985
  "model.layers.57.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
1986
- "model.layers.57.self_attn.k_proj.bias": "model-00004-of-00005.safetensors",
1987
- "model.layers.57.self_attn.k_proj.g_idx": "model-00004-of-00005.safetensors",
1988
- "model.layers.57.self_attn.k_proj.qweight": "model-00004-of-00005.safetensors",
1989
- "model.layers.57.self_attn.k_proj.qzeros": "model-00004-of-00005.safetensors",
1990
- "model.layers.57.self_attn.k_proj.scales": "model-00004-of-00005.safetensors",
1991
  "model.layers.57.self_attn.o_proj.bias": "model-00005-of-00005.safetensors",
1992
  "model.layers.57.self_attn.o_proj.g_idx": "model-00005-of-00005.safetensors",
1993
  "model.layers.57.self_attn.o_proj.qweight": "model-00005-of-00005.safetensors",
@@ -2344,8 +2344,8 @@
2344
  "model.layers.9.mlp.down_proj.scales": "model-00001-of-00005.safetensors",
2345
  "model.layers.9.mlp.gate_proj.bias": "model-00002-of-00005.safetensors",
2346
  "model.layers.9.mlp.gate_proj.g_idx": "model-00002-of-00005.safetensors",
2347
- "model.layers.9.mlp.gate_proj.qweight": "model-00001-of-00005.safetensors",
2348
- "model.layers.9.mlp.gate_proj.qzeros": "model-00001-of-00005.safetensors",
2349
  "model.layers.9.mlp.gate_proj.scales": "model-00002-of-00005.safetensors",
2350
  "model.layers.9.mlp.up_proj.bias": "model-00002-of-00005.safetensors",
2351
  "model.layers.9.mlp.up_proj.g_idx": "model-00002-of-00005.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 19352135680
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00005-of-00005.safetensors",
 
672
  "model.layers.24.self_attn.v_proj.qzeros": "model-00002-of-00005.safetensors",
673
  "model.layers.24.self_attn.v_proj.scales": "model-00002-of-00005.safetensors",
674
  "model.layers.25.input_layernorm.weight": "model-00003-of-00005.safetensors",
675
+ "model.layers.25.mlp.down_proj.bias": "model-00003-of-00005.safetensors",
676
+ "model.layers.25.mlp.down_proj.g_idx": "model-00003-of-00005.safetensors",
677
+ "model.layers.25.mlp.down_proj.qweight": "model-00003-of-00005.safetensors",
678
+ "model.layers.25.mlp.down_proj.qzeros": "model-00003-of-00005.safetensors",
679
+ "model.layers.25.mlp.down_proj.scales": "model-00003-of-00005.safetensors",
680
  "model.layers.25.mlp.gate_proj.bias": "model-00003-of-00005.safetensors",
681
  "model.layers.25.mlp.gate_proj.g_idx": "model-00003-of-00005.safetensors",
682
  "model.layers.25.mlp.gate_proj.qweight": "model-00003-of-00005.safetensors",
 
1300
  "model.layers.4.self_attn.v_proj.qweight": "model-00001-of-00005.safetensors",
1301
  "model.layers.4.self_attn.v_proj.qzeros": "model-00001-of-00005.safetensors",
1302
  "model.layers.4.self_attn.v_proj.scales": "model-00001-of-00005.safetensors",
1303
+ "model.layers.40.input_layernorm.weight": "model-00004-of-00005.safetensors",
1304
  "model.layers.40.mlp.down_proj.bias": "model-00003-of-00005.safetensors",
1305
  "model.layers.40.mlp.down_proj.g_idx": "model-00003-of-00005.safetensors",
1306
  "model.layers.40.mlp.down_proj.qweight": "model-00003-of-00005.safetensors",
 
1311
  "model.layers.40.mlp.gate_proj.qweight": "model-00003-of-00005.safetensors",
1312
  "model.layers.40.mlp.gate_proj.qzeros": "model-00003-of-00005.safetensors",
1313
  "model.layers.40.mlp.gate_proj.scales": "model-00003-of-00005.safetensors",
1314
+ "model.layers.40.mlp.up_proj.bias": "model-00004-of-00005.safetensors",
1315
+ "model.layers.40.mlp.up_proj.g_idx": "model-00004-of-00005.safetensors",
1316
+ "model.layers.40.mlp.up_proj.qweight": "model-00004-of-00005.safetensors",
1317
+ "model.layers.40.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
1318
+ "model.layers.40.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
1319
+ "model.layers.40.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
1320
  "model.layers.40.self_attn.k_proj.bias": "model-00003-of-00005.safetensors",
1321
  "model.layers.40.self_attn.k_proj.g_idx": "model-00003-of-00005.safetensors",
1322
  "model.layers.40.self_attn.k_proj.qweight": "model-00003-of-00005.safetensors",
 
1354
  "model.layers.41.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
1355
  "model.layers.41.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
1356
  "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
1357
+ "model.layers.41.self_attn.k_proj.bias": "model-00004-of-00005.safetensors",
1358
+ "model.layers.41.self_attn.k_proj.g_idx": "model-00004-of-00005.safetensors",
1359
+ "model.layers.41.self_attn.k_proj.qweight": "model-00004-of-00005.safetensors",
1360
+ "model.layers.41.self_attn.k_proj.qzeros": "model-00004-of-00005.safetensors",
1361
+ "model.layers.41.self_attn.k_proj.scales": "model-00004-of-00005.safetensors",
1362
+ "model.layers.41.self_attn.o_proj.bias": "model-00004-of-00005.safetensors",
1363
+ "model.layers.41.self_attn.o_proj.g_idx": "model-00004-of-00005.safetensors",
1364
+ "model.layers.41.self_attn.o_proj.qweight": "model-00004-of-00005.safetensors",
1365
+ "model.layers.41.self_attn.o_proj.qzeros": "model-00004-of-00005.safetensors",
1366
+ "model.layers.41.self_attn.o_proj.scales": "model-00004-of-00005.safetensors",
1367
+ "model.layers.41.self_attn.q_proj.bias": "model-00004-of-00005.safetensors",
1368
+ "model.layers.41.self_attn.q_proj.g_idx": "model-00004-of-00005.safetensors",
1369
+ "model.layers.41.self_attn.q_proj.qweight": "model-00004-of-00005.safetensors",
1370
+ "model.layers.41.self_attn.q_proj.qzeros": "model-00004-of-00005.safetensors",
1371
+ "model.layers.41.self_attn.q_proj.scales": "model-00004-of-00005.safetensors",
1372
+ "model.layers.41.self_attn.v_proj.bias": "model-00004-of-00005.safetensors",
1373
+ "model.layers.41.self_attn.v_proj.g_idx": "model-00004-of-00005.safetensors",
1374
+ "model.layers.41.self_attn.v_proj.qweight": "model-00004-of-00005.safetensors",
1375
+ "model.layers.41.self_attn.v_proj.qzeros": "model-00004-of-00005.safetensors",
1376
+ "model.layers.41.self_attn.v_proj.scales": "model-00004-of-00005.safetensors",
1377
  "model.layers.42.input_layernorm.weight": "model-00004-of-00005.safetensors",
1378
  "model.layers.42.mlp.down_proj.bias": "model-00004-of-00005.safetensors",
1379
  "model.layers.42.mlp.down_proj.g_idx": "model-00004-of-00005.safetensors",
 
1929
  "model.layers.55.self_attn.v_proj.qweight": "model-00004-of-00005.safetensors",
1930
  "model.layers.55.self_attn.v_proj.qzeros": "model-00004-of-00005.safetensors",
1931
  "model.layers.55.self_attn.v_proj.scales": "model-00004-of-00005.safetensors",
1932
+ "model.layers.56.input_layernorm.weight": "model-00005-of-00005.safetensors",
1933
  "model.layers.56.mlp.down_proj.bias": "model-00004-of-00005.safetensors",
1934
  "model.layers.56.mlp.down_proj.g_idx": "model-00004-of-00005.safetensors",
1935
  "model.layers.56.mlp.down_proj.qweight": "model-00004-of-00005.safetensors",
1936
  "model.layers.56.mlp.down_proj.qzeros": "model-00004-of-00005.safetensors",
1937
  "model.layers.56.mlp.down_proj.scales": "model-00004-of-00005.safetensors",
1938
+ "model.layers.56.mlp.gate_proj.bias": "model-00005-of-00005.safetensors",
1939
+ "model.layers.56.mlp.gate_proj.g_idx": "model-00005-of-00005.safetensors",
1940
+ "model.layers.56.mlp.gate_proj.qweight": "model-00005-of-00005.safetensors",
1941
+ "model.layers.56.mlp.gate_proj.qzeros": "model-00005-of-00005.safetensors",
1942
+ "model.layers.56.mlp.gate_proj.scales": "model-00005-of-00005.safetensors",
1943
+ "model.layers.56.mlp.up_proj.bias": "model-00005-of-00005.safetensors",
1944
+ "model.layers.56.mlp.up_proj.g_idx": "model-00005-of-00005.safetensors",
1945
+ "model.layers.56.mlp.up_proj.qweight": "model-00005-of-00005.safetensors",
1946
+ "model.layers.56.mlp.up_proj.qzeros": "model-00005-of-00005.safetensors",
1947
+ "model.layers.56.mlp.up_proj.scales": "model-00005-of-00005.safetensors",
1948
+ "model.layers.56.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
1949
  "model.layers.56.self_attn.k_proj.bias": "model-00004-of-00005.safetensors",
1950
  "model.layers.56.self_attn.k_proj.g_idx": "model-00004-of-00005.safetensors",
1951
  "model.layers.56.self_attn.k_proj.qweight": "model-00004-of-00005.safetensors",
 
1983
  "model.layers.57.mlp.up_proj.qzeros": "model-00005-of-00005.safetensors",
1984
  "model.layers.57.mlp.up_proj.scales": "model-00005-of-00005.safetensors",
1985
  "model.layers.57.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
1986
+ "model.layers.57.self_attn.k_proj.bias": "model-00005-of-00005.safetensors",
1987
+ "model.layers.57.self_attn.k_proj.g_idx": "model-00005-of-00005.safetensors",
1988
+ "model.layers.57.self_attn.k_proj.qweight": "model-00005-of-00005.safetensors",
1989
+ "model.layers.57.self_attn.k_proj.qzeros": "model-00005-of-00005.safetensors",
1990
+ "model.layers.57.self_attn.k_proj.scales": "model-00005-of-00005.safetensors",
1991
  "model.layers.57.self_attn.o_proj.bias": "model-00005-of-00005.safetensors",
1992
  "model.layers.57.self_attn.o_proj.g_idx": "model-00005-of-00005.safetensors",
1993
  "model.layers.57.self_attn.o_proj.qweight": "model-00005-of-00005.safetensors",
 
2344
  "model.layers.9.mlp.down_proj.scales": "model-00001-of-00005.safetensors",
2345
  "model.layers.9.mlp.gate_proj.bias": "model-00002-of-00005.safetensors",
2346
  "model.layers.9.mlp.gate_proj.g_idx": "model-00002-of-00005.safetensors",
2347
+ "model.layers.9.mlp.gate_proj.qweight": "model-00002-of-00005.safetensors",
2348
+ "model.layers.9.mlp.gate_proj.qzeros": "model-00002-of-00005.safetensors",
2349
  "model.layers.9.mlp.gate_proj.scales": "model-00002-of-00005.safetensors",
2350
  "model.layers.9.mlp.up_proj.bias": "model-00002-of-00005.safetensors",
2351
  "model.layers.9.mlp.up_proj.g_idx": "model-00002-of-00005.safetensors",