Tongjilibo commited on
Commit
d73ebe7
1 Parent(s): 08601ed

增加qwen14b

Browse files
Qwen-14B-Chat/bert4torch_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "qwen",
3
+ "hidden_act": "silu",
4
+ "intermediate_size": 13696,
5
+ "initializer_range": 0.02,
6
+ "layer_norm_eps": 1e-06,
7
+ "hidden_size": 5120,
8
+ "num_attention_heads": 40,
9
+ "num_hidden_layers": 40,
10
+ "rotary_emb_base": 10000,
11
+ "rotary_pct": 1.0,
12
+ "scale_attn_weights": true,
13
+ "seq_length": 2048,
14
+ "tie_word_embeddings": false,
15
+ "use_cache": true,
16
+ "flash_attention": "flash_attn_2",
17
+ "vocab_size": 152064,
18
+ "use_dynamic_ntk": true,
19
+ "use_logn_attn": true,
20
+ "segment_vocab_size": 0,
21
+ "skip_init": true,
22
+ "rope_rank": "updown",
23
+ "max_position_embeddings": 8192
24
+ }
Qwen-14B/bert4torch_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "qwen",
3
+ "hidden_act": "silu",
4
+ "intermediate_size": 13696,
5
+ "initializer_range": 0.02,
6
+ "layer_norm_eps": 1e-06,
7
+ "hidden_size": 5120,
8
+ "num_attention_heads": 40,
9
+ "num_hidden_layers": 40,
10
+ "rotary_emb_base": 10000,
11
+ "rotary_pct": 1.0,
12
+ "scale_attn_weights": true,
13
+ "seq_length": 2048,
14
+ "tie_word_embeddings": false,
15
+ "use_cache": true,
16
+ "flash_attention": "flash_attn_2",
17
+ "vocab_size": 152064,
18
+ "use_dynamic_ntk": true,
19
+ "use_logn_attn": true,
20
+ "segment_vocab_size": 0,
21
+ "skip_init": true,
22
+ "rope_rank": "updown",
23
+ "max_position_embeddings": 8192
24
+ }