Tongjilibo commited on
Commit
9b6343b
1 Parent(s): 8c4f939
nezha-base-wwm/bert4torch_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0.1,
3
+ "bos_token_id": 2,
4
+ "classifier_dropout": 0.1,
5
+ "embedding_size": 128,
6
+ "eos_token_id": 3,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "inner_group_num": 1,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "max_relative_position": 64,
16
+ "model": "nezha",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_groups": 1,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 0,
21
+ "torch_dtype": "float32",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "use_relative_position": true,
25
+ "vocab_size": 21128
26
+ }
nezha-cn-base/bert4torch_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0.1,
3
+ "hidden_act": "gelu",
4
+ "hidden_dropout_prob": 0.1,
5
+ "hidden_size": 768,
6
+ "initializer_range": 0.02,
7
+ "intermediate_size": 3072,
8
+ "max_position_embeddings": 512,
9
+ "max_relative_position": 64,
10
+ "num_attention_heads": 12,
11
+ "num_hidden_layers": 12,
12
+ "type_vocab_size": 2,
13
+ "vocab_size": 21128,
14
+ "use_relative_position": true,
15
+ "model": "nezha"
16
+ }
nezha-cn-large/bert4torch_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0.1,
3
+ "bos_token_id": 2,
4
+ "classifier_dropout": 0.1,
5
+ "embedding_size": 128,
6
+ "eos_token_id": 3,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_position": 64,
15
+ "model": "nezha",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "torch_dtype": "float32",
20
+ "type_vocab_size": 2,
21
+ "use_cache": true,
22
+ "use_relative_position": true,
23
+ "vocab_size": 21128
24
+ }
nezha-large-wwm/bert4torch_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0.1,
3
+ "bos_token_id": 2,
4
+ "classifier_dropout": 0.1,
5
+ "embedding_size": 128,
6
+ "eos_token_id": 3,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 1024,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_position": 64,
15
+ "model": "nezha",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "torch_dtype": "float32",
20
+ "type_vocab_size": 2,
21
+ "use_cache": true,
22
+ "use_relative_position": true,
23
+ "vocab_size": 21128
24
+ }