feihu.hf
commited on
Commit
•
80ba958
1
Parent(s):
3971207
update weights
Browse files- config.json +1 -1
- model-00001-of-00005.safetensors +2 -2
- model-00002-of-00005.safetensors +2 -2
- model-00003-of-00005.safetensors +2 -2
- model-00004-of-00005.safetensors +2 -2
- model.safetensors.index.json +39 -39
config.json
CHANGED
@@ -8,7 +8,7 @@
|
|
8 |
"hidden_act": "silu",
|
9 |
"hidden_size": 5120,
|
10 |
"initializer_range": 0.02,
|
11 |
-
"intermediate_size":
|
12 |
"max_position_embeddings": 32768,
|
13 |
"max_window_layers": 35,
|
14 |
"model_type": "qwen2",
|
|
|
8 |
"hidden_act": "silu",
|
9 |
"hidden_size": 5120,
|
10 |
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 14336,
|
12 |
"max_position_embeddings": 32768,
|
13 |
"max_window_layers": 35,
|
14 |
"model_type": "qwen2",
|
model-00001-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1feec0077fd4055e9fb6a5db24240a81ac41ff117cda1a67670a6149c3791f6
|
3 |
+
size 3995509184
|
model-00002-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4479a9d40667ba0fbcd6edba3a18dc552e5550b59fe438721b260ece78abfae1
|
3 |
+
size 3995886232
|
model-00003-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41c4b900993032a3f94adc2f20a593bf3884bfc7b089a7061137e52f20cf120d
|
3 |
+
size 3995886344
|
model-00004-of-00005.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d6701d5280d5764aa071fae569cc7535b563c7b5efa6cdd81964a90abf78008
|
3 |
+
size 2889484712
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00005-of-00005.safetensors",
|
@@ -413,16 +413,16 @@
|
|
413 |
"model.layers.18.self_attn.v_proj.qzeros": "model-00002-of-00005.safetensors",
|
414 |
"model.layers.18.self_attn.v_proj.scales": "model-00002-of-00005.safetensors",
|
415 |
"model.layers.19.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
416 |
-
"model.layers.19.mlp.down_proj.bias": "model-
|
417 |
-
"model.layers.19.mlp.down_proj.g_idx": "model-
|
418 |
-
"model.layers.19.mlp.down_proj.qweight": "model-
|
419 |
-
"model.layers.19.mlp.down_proj.qzeros": "model-
|
420 |
-
"model.layers.19.mlp.down_proj.scales": "model-
|
421 |
-
"model.layers.19.mlp.gate_proj.bias": "model-
|
422 |
-
"model.layers.19.mlp.gate_proj.g_idx": "model-
|
423 |
-
"model.layers.19.mlp.gate_proj.qweight": "model-
|
424 |
-
"model.layers.19.mlp.gate_proj.qzeros": "model-
|
425 |
-
"model.layers.19.mlp.gate_proj.scales": "model-
|
426 |
"model.layers.19.mlp.up_proj.bias": "model-00003-of-00005.safetensors",
|
427 |
"model.layers.19.mlp.up_proj.g_idx": "model-00003-of-00005.safetensors",
|
428 |
"model.layers.19.mlp.up_proj.qweight": "model-00003-of-00005.safetensors",
|
@@ -930,23 +930,23 @@
|
|
930 |
"model.layers.30.self_attn.v_proj.qweight": "model-00003-of-00005.safetensors",
|
931 |
"model.layers.30.self_attn.v_proj.qzeros": "model-00003-of-00005.safetensors",
|
932 |
"model.layers.30.self_attn.v_proj.scales": "model-00003-of-00005.safetensors",
|
933 |
-
"model.layers.31.input_layernorm.weight": "model-
|
934 |
-
"model.layers.31.mlp.down_proj.bias": "model-
|
935 |
-
"model.layers.31.mlp.down_proj.g_idx": "model-
|
936 |
-
"model.layers.31.mlp.down_proj.qweight": "model-
|
937 |
-
"model.layers.31.mlp.down_proj.qzeros": "model-
|
938 |
-
"model.layers.31.mlp.down_proj.scales": "model-
|
939 |
-
"model.layers.31.mlp.gate_proj.bias": "model-
|
940 |
-
"model.layers.31.mlp.gate_proj.g_idx": "model-
|
941 |
-
"model.layers.31.mlp.gate_proj.qweight": "model-
|
942 |
-
"model.layers.31.mlp.gate_proj.qzeros": "model-
|
943 |
-
"model.layers.31.mlp.gate_proj.scales": "model-
|
944 |
-
"model.layers.31.mlp.up_proj.bias": "model-
|
945 |
-
"model.layers.31.mlp.up_proj.g_idx": "model-
|
946 |
-
"model.layers.31.mlp.up_proj.qweight": "model-
|
947 |
-
"model.layers.31.mlp.up_proj.qzeros": "model-
|
948 |
-
"model.layers.31.mlp.up_proj.scales": "model-
|
949 |
-
"model.layers.31.post_attention_layernorm.weight": "model-
|
950 |
"model.layers.31.self_attn.k_proj.bias": "model-00003-of-00005.safetensors",
|
951 |
"model.layers.31.self_attn.k_proj.g_idx": "model-00003-of-00005.safetensors",
|
952 |
"model.layers.31.self_attn.k_proj.qweight": "model-00003-of-00005.safetensors",
|
@@ -984,14 +984,14 @@
|
|
984 |
"model.layers.32.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
|
985 |
"model.layers.32.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
|
986 |
"model.layers.32.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
987 |
-
"model.layers.32.self_attn.k_proj.bias": "model-
|
988 |
-
"model.layers.32.self_attn.k_proj.g_idx": "model-
|
989 |
-
"model.layers.32.self_attn.k_proj.qweight": "model-
|
990 |
-
"model.layers.32.self_attn.k_proj.qzeros": "model-
|
991 |
-
"model.layers.32.self_attn.k_proj.scales": "model-
|
992 |
"model.layers.32.self_attn.o_proj.bias": "model-00004-of-00005.safetensors",
|
993 |
"model.layers.32.self_attn.o_proj.g_idx": "model-00004-of-00005.safetensors",
|
994 |
-
"model.layers.32.self_attn.o_proj.qweight": "model-
|
995 |
"model.layers.32.self_attn.o_proj.qzeros": "model-00004-of-00005.safetensors",
|
996 |
"model.layers.32.self_attn.o_proj.scales": "model-00004-of-00005.safetensors",
|
997 |
"model.layers.32.self_attn.q_proj.bias": "model-00004-of-00005.safetensors",
|
@@ -1375,11 +1375,11 @@
|
|
1375 |
"model.layers.6.self_attn.v_proj.qzeros": "model-00001-of-00005.safetensors",
|
1376 |
"model.layers.6.self_attn.v_proj.scales": "model-00001-of-00005.safetensors",
|
1377 |
"model.layers.7.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
1378 |
-
"model.layers.7.mlp.down_proj.bias": "model-
|
1379 |
-
"model.layers.7.mlp.down_proj.g_idx": "model-
|
1380 |
-
"model.layers.7.mlp.down_proj.qweight": "model-
|
1381 |
-
"model.layers.7.mlp.down_proj.qzeros": "model-
|
1382 |
-
"model.layers.7.mlp.down_proj.scales": "model-
|
1383 |
"model.layers.7.mlp.gate_proj.bias": "model-00002-of-00005.safetensors",
|
1384 |
"model.layers.7.mlp.gate_proj.g_idx": "model-00002-of-00005.safetensors",
|
1385 |
"model.layers.7.mlp.gate_proj.qweight": "model-00002-of-00005.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 16433735680
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00005-of-00005.safetensors",
|
|
|
413 |
"model.layers.18.self_attn.v_proj.qzeros": "model-00002-of-00005.safetensors",
|
414 |
"model.layers.18.self_attn.v_proj.scales": "model-00002-of-00005.safetensors",
|
415 |
"model.layers.19.input_layernorm.weight": "model-00003-of-00005.safetensors",
|
416 |
+
"model.layers.19.mlp.down_proj.bias": "model-00003-of-00005.safetensors",
|
417 |
+
"model.layers.19.mlp.down_proj.g_idx": "model-00003-of-00005.safetensors",
|
418 |
+
"model.layers.19.mlp.down_proj.qweight": "model-00003-of-00005.safetensors",
|
419 |
+
"model.layers.19.mlp.down_proj.qzeros": "model-00003-of-00005.safetensors",
|
420 |
+
"model.layers.19.mlp.down_proj.scales": "model-00003-of-00005.safetensors",
|
421 |
+
"model.layers.19.mlp.gate_proj.bias": "model-00003-of-00005.safetensors",
|
422 |
+
"model.layers.19.mlp.gate_proj.g_idx": "model-00003-of-00005.safetensors",
|
423 |
+
"model.layers.19.mlp.gate_proj.qweight": "model-00003-of-00005.safetensors",
|
424 |
+
"model.layers.19.mlp.gate_proj.qzeros": "model-00003-of-00005.safetensors",
|
425 |
+
"model.layers.19.mlp.gate_proj.scales": "model-00003-of-00005.safetensors",
|
426 |
"model.layers.19.mlp.up_proj.bias": "model-00003-of-00005.safetensors",
|
427 |
"model.layers.19.mlp.up_proj.g_idx": "model-00003-of-00005.safetensors",
|
428 |
"model.layers.19.mlp.up_proj.qweight": "model-00003-of-00005.safetensors",
|
|
|
930 |
"model.layers.30.self_attn.v_proj.qweight": "model-00003-of-00005.safetensors",
|
931 |
"model.layers.30.self_attn.v_proj.qzeros": "model-00003-of-00005.safetensors",
|
932 |
"model.layers.30.self_attn.v_proj.scales": "model-00003-of-00005.safetensors",
|
933 |
+
"model.layers.31.input_layernorm.weight": "model-00004-of-00005.safetensors",
|
934 |
+
"model.layers.31.mlp.down_proj.bias": "model-00004-of-00005.safetensors",
|
935 |
+
"model.layers.31.mlp.down_proj.g_idx": "model-00004-of-00005.safetensors",
|
936 |
+
"model.layers.31.mlp.down_proj.qweight": "model-00004-of-00005.safetensors",
|
937 |
+
"model.layers.31.mlp.down_proj.qzeros": "model-00004-of-00005.safetensors",
|
938 |
+
"model.layers.31.mlp.down_proj.scales": "model-00004-of-00005.safetensors",
|
939 |
+
"model.layers.31.mlp.gate_proj.bias": "model-00004-of-00005.safetensors",
|
940 |
+
"model.layers.31.mlp.gate_proj.g_idx": "model-00004-of-00005.safetensors",
|
941 |
+
"model.layers.31.mlp.gate_proj.qweight": "model-00004-of-00005.safetensors",
|
942 |
+
"model.layers.31.mlp.gate_proj.qzeros": "model-00004-of-00005.safetensors",
|
943 |
+
"model.layers.31.mlp.gate_proj.scales": "model-00004-of-00005.safetensors",
|
944 |
+
"model.layers.31.mlp.up_proj.bias": "model-00004-of-00005.safetensors",
|
945 |
+
"model.layers.31.mlp.up_proj.g_idx": "model-00004-of-00005.safetensors",
|
946 |
+
"model.layers.31.mlp.up_proj.qweight": "model-00004-of-00005.safetensors",
|
947 |
+
"model.layers.31.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
|
948 |
+
"model.layers.31.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
|
949 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
950 |
"model.layers.31.self_attn.k_proj.bias": "model-00003-of-00005.safetensors",
|
951 |
"model.layers.31.self_attn.k_proj.g_idx": "model-00003-of-00005.safetensors",
|
952 |
"model.layers.31.self_attn.k_proj.qweight": "model-00003-of-00005.safetensors",
|
|
|
984 |
"model.layers.32.mlp.up_proj.qzeros": "model-00004-of-00005.safetensors",
|
985 |
"model.layers.32.mlp.up_proj.scales": "model-00004-of-00005.safetensors",
|
986 |
"model.layers.32.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
|
987 |
+
"model.layers.32.self_attn.k_proj.bias": "model-00004-of-00005.safetensors",
|
988 |
+
"model.layers.32.self_attn.k_proj.g_idx": "model-00004-of-00005.safetensors",
|
989 |
+
"model.layers.32.self_attn.k_proj.qweight": "model-00004-of-00005.safetensors",
|
990 |
+
"model.layers.32.self_attn.k_proj.qzeros": "model-00004-of-00005.safetensors",
|
991 |
+
"model.layers.32.self_attn.k_proj.scales": "model-00004-of-00005.safetensors",
|
992 |
"model.layers.32.self_attn.o_proj.bias": "model-00004-of-00005.safetensors",
|
993 |
"model.layers.32.self_attn.o_proj.g_idx": "model-00004-of-00005.safetensors",
|
994 |
+
"model.layers.32.self_attn.o_proj.qweight": "model-00004-of-00005.safetensors",
|
995 |
"model.layers.32.self_attn.o_proj.qzeros": "model-00004-of-00005.safetensors",
|
996 |
"model.layers.32.self_attn.o_proj.scales": "model-00004-of-00005.safetensors",
|
997 |
"model.layers.32.self_attn.q_proj.bias": "model-00004-of-00005.safetensors",
|
|
|
1375 |
"model.layers.6.self_attn.v_proj.qzeros": "model-00001-of-00005.safetensors",
|
1376 |
"model.layers.6.self_attn.v_proj.scales": "model-00001-of-00005.safetensors",
|
1377 |
"model.layers.7.input_layernorm.weight": "model-00002-of-00005.safetensors",
|
1378 |
+
"model.layers.7.mlp.down_proj.bias": "model-00002-of-00005.safetensors",
|
1379 |
+
"model.layers.7.mlp.down_proj.g_idx": "model-00002-of-00005.safetensors",
|
1380 |
+
"model.layers.7.mlp.down_proj.qweight": "model-00002-of-00005.safetensors",
|
1381 |
+
"model.layers.7.mlp.down_proj.qzeros": "model-00002-of-00005.safetensors",
|
1382 |
+
"model.layers.7.mlp.down_proj.scales": "model-00002-of-00005.safetensors",
|
1383 |
"model.layers.7.mlp.gate_proj.bias": "model-00002-of-00005.safetensors",
|
1384 |
"model.layers.7.mlp.gate_proj.g_idx": "model-00002-of-00005.safetensors",
|
1385 |
"model.layers.7.mlp.gate_proj.qweight": "model-00002-of-00005.safetensors",
|