Tongjilibo commited on
Commit
16a0f86
1 Parent(s): 9b6343b

add config

Browse files
ChatYuan-large-v2/bert4torch_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "mt5.1.1",
3
+ "hidden_act": "gelu",
4
+ "hidden_dropout_prob": 0.1,
5
+ "hidden_size": 1024,
6
+ "intermediate_size": 2816,
7
+ "num_attention_heads": 16,
8
+ "attention_head_size": 64,
9
+ "num_hidden_layers": 24,
10
+ "vocab_size": 32128,
11
+ "relative_attention_num_buckets": 32,
12
+ "attention_scale": false,
13
+ "is_dropout": true,
14
+ "max_position_embeddings": 768,
15
+ "segment_vocab_size": 0,
16
+ "logit_scale": false
17
+ }
Erlangshen-DeBERTa-v2-320M-Chinese/bert4torch_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "deberta_v2",
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "hidden_act": "gelu",
5
+ "hidden_dropout_prob": 0.1,
6
+ "hidden_size": 1024,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 4096,
9
+ "max_position_embeddings": 512,
10
+ "relative_attention": true,
11
+ "position_buckets": 256,
12
+ "norm_rel_ebd": "layer_norm",
13
+ "share_att_key": true,
14
+ "pos_att_type": "c2p|p2c",
15
+ "conv_kernel_size": 3,
16
+ "conv_act": "gelu",
17
+ "layer_norm_eps": 1e-7,
18
+ "max_relative_positions": -1,
19
+ "position_biased_input": false,
20
+ "num_attention_heads": 16,
21
+ "num_hidden_layers": 24,
22
+ "type_vocab_size": 0,
23
+ "vocab_size": 12800,
24
+ "num_labels": 119
25
+ }
Erlangshen-DeBERTa-v2-710M-Chinese/bert4torch_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "deberta_v2",
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "attention_head_size": 64,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout_prob": 0.1,
7
+ "hidden_size": 1536,
8
+ "initializer_range": 0.02,
9
+ "intermediate_size": 6144,
10
+ "max_position_embeddings": 512,
11
+ "relative_attention": true,
12
+ "position_buckets": 256,
13
+ "norm_rel_ebd": "layer_norm",
14
+ "share_att_key": true,
15
+ "pos_att_type": [
16
+ "p2c",
17
+ "c2p"
18
+ ],
19
+ "conv_kernel_size": 3,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 1536,
23
+ "conv_act": "gelu",
24
+ "layer_norm_eps": 1e-7,
25
+ "max_relative_positions": -1,
26
+ "position_biased_input": false,
27
+ "num_attention_heads": 24,
28
+ "num_hidden_layers": 24,
29
+ "type_vocab_size": 0,
30
+ "num_labels": 119,
31
+ "vocab_size": 12800
32
+ }
Erlangshen-DeBERTa-v2-97M-Chinese/bert4torch_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "deberta_v2",
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "hidden_act": "gelu",
5
+ "hidden_dropout_prob": 0.1,
6
+ "hidden_size": 768,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 3072,
9
+ "max_position_embeddings": 512,
10
+ "relative_attention": true,
11
+ "position_buckets": 256,
12
+ "norm_rel_ebd": "layer_norm",
13
+ "share_att_key": true,
14
+ "pos_att_type": "c2p|p2c",
15
+ "conv_kernel_size": 3,
16
+ "conv_act": "gelu",
17
+ "layer_norm_eps": 1e-7,
18
+ "max_relative_positions": -1,
19
+ "position_biased_input": false,
20
+ "num_attention_heads": 12,
21
+ "num_hidden_layers": 12,
22
+ "type_vocab_size": 0,
23
+ "vocab_size": 12800
24
+ }
chatglm-6b-v0.1.0/bert4torch_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "glm",
3
+ "hidden_act": "gelu_new",
4
+ "bos_token_id": 130004,
5
+ "eos_token_id": 130005,
6
+ "mask_token_id": 130000,
7
+ "gmask_token_id": 130001,
8
+ "pad_token_id": 3,
9
+ "hidden_size": 4096,
10
+ "intermediate_size": 16384,
11
+ "layer_norm_eps": 1e-05,
12
+ "max_sequence_length": 2048,
13
+ "num_attention_heads": 32,
14
+ "num_hidden_layers": 28,
15
+ "position_encoding_2d": true,
16
+ "torch_dtype": "float16",
17
+ "vocab_size": 130528,
18
+ "segment_vocab_size": 0,
19
+ "skip_init": true,
20
+ "rope_rank": "updown",
21
+ "tie_emb_prj_weight": false
22
+ }
chinese-electra-base-discriminator/bert4torch_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "electra",
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "directionality": "bidi",
5
+ "embedding_size": 768,
6
+ "hidden_act": "gelu",
7
+ "hidden_dropout_prob": 0.1,
8
+ "hidden_size": 768,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 3072,
11
+ "layer_norm_eps": 1e-12,
12
+ "max_position_embeddings": 512,
13
+ "num_attention_heads": 12,
14
+ "num_hidden_layers": 12,
15
+ "pad_token_id": 0,
16
+ "type_vocab_size": 2,
17
+ "vocab_size": 21128
18
+ }
ernie-1.0-base-zh/bert4torch_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0.1,
3
+ "hidden_act": "relu",
4
+ "hidden_dropout_prob": 0.1,
5
+ "hidden_size": 768,
6
+ "initializer_range": 0.02,
7
+ "max_position_embeddings": 513,
8
+ "num_attention_heads": 12,
9
+ "num_hidden_layers": 12,
10
+ "type_vocab_size": 2,
11
+ "vocab_size": 18000,
12
+ "pad_token_id": 0,
13
+ "layer_norm_eps": 1e-05,
14
+ "model": "ernie",
15
+ "intermediate_size": 3072
16
+ }
ernie-3.0-base-zh/bert4torch_config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0.1,
3
+ "hidden_act": "gelu",
4
+ "hidden_dropout_prob": 0.1,
5
+ "hidden_size": 768,
6
+ "initializer_range": 0.02,
7
+ "max_position_embeddings": 2048,
8
+ "num_attention_heads": 12,
9
+ "num_hidden_layers": 12,
10
+ "task_type_vocab_size": 3,
11
+ "type_vocab_size": 4,
12
+ "use_task_id": true,
13
+ "vocab_size": 40000,
14
+ "layer_norm_eps": 1e-05,
15
+ "model": "ernie",
16
+ "intermediate_size": 3072
17
+ }
gpt2-ml_15g_corpus/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # gpt2-ml
2
+ - 项目链接(tf版本):https://github.com/imcaspar/gpt2-ml
3
+ - pytorch权重转换和下载:https://github.com/ghosthamlet/gpt2-ml-torch
gpt2-ml_15g_corpus/bert4torch_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "gpt2_ml",
3
+ "segment_vocab_size": 0,
4
+ "vocab_size": 21130,
5
+ "hidden_size": 1536,
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_dropout_prob": 0.1,
8
+ "hidden_act": "gelu",
9
+ "initializer_range": 0.014142135623731,
10
+ "intermediate_size": 6144,
11
+ "max_position_embeddings": 1024,
12
+ "num_attention_heads": 24,
13
+ "num_hidden_layers": 48
14
+ }
gpt2-ml_30g_corpus/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # gpt2-ml
2
+ - 项目链接(tf版本):https://github.com/imcaspar/gpt2-ml
3
+ - pytorch权重转换和下载:https://github.com/ghosthamlet/gpt2-ml-torch
gpt2-ml_30g_corpus/bert4torch_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "gpt2_ml",
3
+ "segment_vocab_size": 0,
4
+ "vocab_size": 21130,
5
+ "hidden_size": 1536,
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_dropout_prob": 0.1,
8
+ "hidden_act": "gelu",
9
+ "initializer_range": 0.014142135623731,
10
+ "intermediate_size": 6144,
11
+ "max_position_embeddings": 1024,
12
+ "num_attention_heads": 24,
13
+ "num_hidden_layers": 48
14
+ }