Commit
•
0b80d89
1
Parent(s):
f3c8733
修改tie_emb_prj_weight为tie_word_embeddings
Browse files- bert-base-chinese/bert4torch_config.json +1 -1
- bloom-560m/bert4torch_config.json +1 -1
- bloomz-560m/bert4torch_config.json +1 -1
- chatglm-6b-int4/bert4torch_config.json +1 -1
- chatglm-6b-int8/bert4torch_config.json +1 -1
- chatglm-6b-v0.1.0/bert4torch_config.json +1 -1
- chatglm-6b/bert4torch_config.json +1 -1
- chatglm2-6b-32k/bert4torch_config.json +1 -1
- chatglm2-6b-int4/bert4torch_config.json +1 -1
- chatglm2-6b/bert4torch_config.json +1 -1
- chatglm3-6b-32k/bert4torch_config.json +1 -1
- chatglm3-6b/bert4torch_config.json +1 -1
bert-base-chinese/bert4torch_config.json
CHANGED
@@ -13,5 +13,5 @@
|
|
13 |
"pad_token_id": 0,
|
14 |
"type_vocab_size": 2,
|
15 |
"vocab_size": 21128,
|
16 |
-
|
17 |
}
|
|
|
13 |
"pad_token_id": 0,
|
14 |
"type_vocab_size": 2,
|
15 |
"vocab_size": 21128,
|
16 |
+
"tie_word_embeddings": true
|
17 |
}
|
bloom-560m/bert4torch_config.json
CHANGED
@@ -22,6 +22,6 @@
|
|
22 |
"vocab_size": 250880,
|
23 |
"segment_vocab_size": 0,
|
24 |
"pre_layernorm": true,
|
25 |
-
"
|
26 |
"model": "bloom"
|
27 |
}
|
|
|
22 |
"vocab_size": 250880,
|
23 |
"segment_vocab_size": 0,
|
24 |
"pre_layernorm": true,
|
25 |
+
"tie_word_embeddings": true,
|
26 |
"model": "bloom"
|
27 |
}
|
bloomz-560m/bert4torch_config.json
CHANGED
@@ -22,6 +22,6 @@
|
|
22 |
"vocab_size": 250880,
|
23 |
"segment_vocab_size": 0,
|
24 |
"pre_layernorm": true,
|
25 |
-
"
|
26 |
"model": "bloom"
|
27 |
}
|
|
|
22 |
"vocab_size": 250880,
|
23 |
"segment_vocab_size": 0,
|
24 |
"pre_layernorm": true,
|
25 |
+
"tie_word_embeddings": true,
|
26 |
"model": "bloom"
|
27 |
}
|
chatglm-6b-int4/bert4torch_config.json
CHANGED
@@ -18,7 +18,7 @@
|
|
18 |
"segment_vocab_size": 0,
|
19 |
"skip_init": true,
|
20 |
"rope_rank": "updown",
|
21 |
-
"
|
22 |
"quantization_bit": 4,
|
23 |
"quantization_method": "cpm_kernels",
|
24 |
"target_modules": ["q", "k", "v", "o", "intermediateDense", "outputDense"],
|
|
|
18 |
"segment_vocab_size": 0,
|
19 |
"skip_init": true,
|
20 |
"rope_rank": "updown",
|
21 |
+
"tie_word_embeddings": false,
|
22 |
"quantization_bit": 4,
|
23 |
"quantization_method": "cpm_kernels",
|
24 |
"target_modules": ["q", "k", "v", "o", "intermediateDense", "outputDense"],
|
chatglm-6b-int8/bert4torch_config.json
CHANGED
@@ -21,6 +21,6 @@
|
|
21 |
"quantization_bit": 8,
|
22 |
"quantization_method": "cpm_kernels",
|
23 |
"target_modules": ["q", "k", "v", "o", "intermediateDense", "outputDense"],
|
24 |
-
"
|
25 |
"generation_config": {"max_length": 2048}
|
26 |
}
|
|
|
21 |
"quantization_bit": 8,
|
22 |
"quantization_method": "cpm_kernels",
|
23 |
"target_modules": ["q", "k", "v", "o", "intermediateDense", "outputDense"],
|
24 |
+
"tie_word_embeddings": false,
|
25 |
"generation_config": {"max_length": 2048}
|
26 |
}
|
chatglm-6b-v0.1.0/bert4torch_config.json
CHANGED
@@ -18,6 +18,6 @@
|
|
18 |
"segment_vocab_size": 0,
|
19 |
"skip_init": true,
|
20 |
"rope_rank": "updown",
|
21 |
-
"
|
22 |
"generation_config": {"max_length": 2048}
|
23 |
}
|
|
|
18 |
"segment_vocab_size": 0,
|
19 |
"skip_init": true,
|
20 |
"rope_rank": "updown",
|
21 |
+
"tie_word_embeddings": false,
|
22 |
"generation_config": {"max_length": 2048}
|
23 |
}
|
chatglm-6b/bert4torch_config.json
CHANGED
@@ -18,6 +18,6 @@
|
|
18 |
"segment_vocab_size": 0,
|
19 |
"skip_init": true,
|
20 |
"rope_rank": "updown",
|
21 |
-
"
|
22 |
"generation_config": {"max_length": 2048}
|
23 |
}
|
|
|
18 |
"segment_vocab_size": 0,
|
19 |
"skip_init": true,
|
20 |
"rope_rank": "updown",
|
21 |
+
"tie_word_embeddings": false,
|
22 |
"generation_config": {"max_length": 2048}
|
23 |
}
|
chatglm2-6b-32k/bert4torch_config.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
-
"
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 2,
|
17 |
"rmsnorm": true,
|
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
+
"tie_word_embeddings": false,
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 2,
|
17 |
"rmsnorm": true,
|
chatglm2-6b-int4/bert4torch_config.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
-
"
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 2,
|
17 |
"rmsnorm": true,
|
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
+
"tie_word_embeddings": false,
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 2,
|
17 |
"rmsnorm": true,
|
chatglm2-6b/bert4torch_config.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
-
"
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 2,
|
17 |
"rmsnorm": true,
|
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
+
"tie_word_embeddings": false,
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 2,
|
17 |
"rmsnorm": true,
|
chatglm3-6b-32k/bert4torch_config.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
-
"
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 2,
|
17 |
"rmsnorm": true,
|
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
+
"tie_word_embeddings": false,
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 2,
|
17 |
"rmsnorm": true,
|
chatglm3-6b/bert4torch_config.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
-
"
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 0,
|
17 |
"rmsnorm": true,
|
|
|
11 |
"segment_vocab_size": 0,
|
12 |
"multi_query_group_num": 2,
|
13 |
"skip_init": true,
|
14 |
+
"tie_word_embeddings": false,
|
15 |
"eos_token_id": 2,
|
16 |
"pad_token_id": 0,
|
17 |
"rmsnorm": true,
|