Tongjilibo commited on
Commit
a417bc8
·
1 Parent(s): e131816

修改Qwen config

Browse files
.gitignore CHANGED
@@ -1 +1,2 @@
1
  config.json
 
 
1
  config.json
2
+ generation_config.json
Qwen-14B-Chat/bert4torch_config.json CHANGED
@@ -7,12 +7,10 @@
7
  "hidden_size": 5120,
8
  "num_attention_heads": 40,
9
  "num_hidden_layers": 40,
10
- "rotary_emb_base": 10000,
11
- "rotary_pct": 1.0,
12
  "scale_attn_weights": true,
13
  "seq_length": 2048,
14
  "tie_word_embeddings": false,
15
- "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
  "rope_scaling": {"type": "dynamic_qwen"},
@@ -22,6 +20,6 @@
22
  "rope_rank": "updown",
23
  "max_position_embeddings": 8192,
24
  "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
25
- "skip_special_tokens": true}, "eos_token_id": [151644, 151645],
26
  "max_length": 8192}
27
  }
 
7
  "hidden_size": 5120,
8
  "num_attention_heads": 40,
9
  "num_hidden_layers": 40,
10
+ "rope_theta": 10000,
 
11
  "scale_attn_weights": true,
12
  "seq_length": 2048,
13
  "tie_word_embeddings": false,
 
14
  "flash_attention": "flash_attn_2",
15
  "vocab_size": 152064,
16
  "rope_scaling": {"type": "dynamic_qwen"},
 
20
  "rope_rank": "updown",
21
  "max_position_embeddings": 8192,
22
  "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
23
+ "skip_special_tokens": true}, "eos_token_id": [151643, 151644, 151645],
24
  "max_length": 8192}
25
  }
Qwen-14B/bert4torch_config.json CHANGED
@@ -7,12 +7,10 @@
7
  "hidden_size": 5120,
8
  "num_attention_heads": 40,
9
  "num_hidden_layers": 40,
10
- "rotary_emb_base": 10000,
11
- "rotary_pct": 1.0,
12
  "scale_attn_weights": true,
13
  "seq_length": 2048,
14
  "tie_word_embeddings": false,
15
- "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
  "rope_scaling": {"type": "dynamic_qwen"},
 
7
  "hidden_size": 5120,
8
  "num_attention_heads": 40,
9
  "num_hidden_layers": 40,
10
+ "rope_theta": 10000,
 
11
  "scale_attn_weights": true,
12
  "seq_length": 2048,
13
  "tie_word_embeddings": false,
 
14
  "flash_attention": "flash_attn_2",
15
  "vocab_size": 152064,
16
  "rope_scaling": {"type": "dynamic_qwen"},
Qwen-1_8B-Chat/bert4torch_config.json CHANGED
@@ -1,26 +1,18 @@
1
  {
2
  "model": "qwen",
3
  "hidden_act": "silu",
4
- "bias_dropout_fusion": true,
5
- "bos_token_id": 151643,
6
- "embd_pdrop": 0.1,
7
  "eos_token_id": 151643,
8
  "intermediate_size": 5504,
9
  "initializer_range": 0.02,
10
- "kv_channels": 128,
11
  "layer_norm_eps": 1e-06,
12
  "hidden_size": 2048,
13
  "num_attention_heads": 16,
14
  "num_hidden_layers": 24,
15
- "n_positions": 6144,
16
- "resid_pdrop": 0.1,
17
- "rotary_emb_base": 10000,
18
- "rotary_pct": 1.0,
19
  "scale_attn_weights": true,
20
- "seq_length": 8192,
21
  "tie_word_embeddings": false,
22
- "use_cache": true,
23
- "use_flash_attn": true,
24
  "vocab_size": 151936,
25
  "rope_scaling": {"type": "dynamic_qwen"},
26
  "use_logn_attn": true,
@@ -29,6 +21,6 @@
29
  "rope_rank": "updown",
30
  "max_position": 8192,
31
  "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
32
- "skip_special_tokens": true}, "eos_token_id": [151644, 151645],
33
  "max_length": 8192}
34
  }
 
1
  {
2
  "model": "qwen",
3
  "hidden_act": "silu",
4
+ "pad_token_id": 151643,
 
 
5
  "eos_token_id": 151643,
6
  "intermediate_size": 5504,
7
  "initializer_range": 0.02,
 
8
  "layer_norm_eps": 1e-06,
9
  "hidden_size": 2048,
10
  "num_attention_heads": 16,
11
  "num_hidden_layers": 24,
12
+ "rope_theta": 10000,
 
 
 
13
  "scale_attn_weights": true,
 
14
  "tie_word_embeddings": false,
15
+ "flash_attention": true,
 
16
  "vocab_size": 151936,
17
  "rope_scaling": {"type": "dynamic_qwen"},
18
  "use_logn_attn": true,
 
21
  "rope_rank": "updown",
22
  "max_position": 8192,
23
  "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
24
+ "skip_special_tokens": true}, "eos_token_id": [151643, 151644, 151645],
25
  "max_length": 8192}
26
  }
Qwen-1_8B/bert4torch_config.json CHANGED
@@ -1,26 +1,18 @@
1
  {
2
  "model": "qwen",
3
  "hidden_act": "silu",
4
- "bias_dropout_fusion": true,
5
- "bos_token_id": 151643,
6
- "embd_pdrop": 0.1,
7
  "eos_token_id": 151643,
8
  "intermediate_size": 5504,
9
  "initializer_range": 0.02,
10
- "kv_channels": 128,
11
  "layer_norm_eps": 1e-06,
12
  "hidden_size": 2048,
13
  "num_attention_heads": 16,
14
  "num_hidden_layers": 24,
15
- "n_positions": 6144,
16
- "resid_pdrop": 0.1,
17
- "rotary_emb_base": 10000,
18
- "rotary_pct": 1.0,
19
  "scale_attn_weights": true,
20
- "seq_length": 8192,
21
  "tie_word_embeddings": false,
22
- "use_cache": true,
23
- "use_flash_attn": true,
24
  "vocab_size": 151936,
25
  "rope_scaling": {"type": "dynamic_qwen"},
26
  "use_logn_attn": true,
 
1
  {
2
  "model": "qwen",
3
  "hidden_act": "silu",
4
+ "pad_token_id": 151643,
 
 
5
  "eos_token_id": 151643,
6
  "intermediate_size": 5504,
7
  "initializer_range": 0.02,
 
8
  "layer_norm_eps": 1e-06,
9
  "hidden_size": 2048,
10
  "num_attention_heads": 16,
11
  "num_hidden_layers": 24,
12
+ "rope_theta": 10000,
 
 
 
13
  "scale_attn_weights": true,
 
14
  "tie_word_embeddings": false,
15
+ "flash_attention": true,
 
16
  "vocab_size": 151936,
17
  "rope_scaling": {"type": "dynamic_qwen"},
18
  "use_logn_attn": true,
Qwen-7B-Chat/bert4torch_config.json CHANGED
@@ -1,25 +1,17 @@
1
  {
2
  "model": "qwen",
3
  "hidden_act": "silu",
4
- "bias_dropout_fusion": true,
5
- "bos_token_id": 151643,
6
- "embd_pdrop": 0.1,
7
  "eos_token_id": 151643,
8
  "intermediate_size": 11008,
9
  "initializer_range": 0.02,
10
- "kv_channels": 128,
11
  "layer_norm_eps": 1e-05,
12
  "hidden_size": 4096,
13
  "num_attention_heads": 32,
14
  "num_hidden_layers": 32,
15
- "n_positions": 6144,
16
- "resid_pdrop": 0.1,
17
- "rotary_emb_base": 10000,
18
- "rotary_pct": 1.0,
19
  "scale_attn_weights": true,
20
- "seq_length": 8192,
21
  "tie_word_embeddings": false,
22
- "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
  "rope_scaling": {"type": "dynamic_qwen"},
@@ -29,6 +21,6 @@
29
  "rope_rank": "updown",
30
  "max_position": 8192,
31
  "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
32
- "skip_special_tokens": true}, "eos_token_id": [151644, 151645],
33
  "max_length": 8192}
34
  }
 
1
  {
2
  "model": "qwen",
3
  "hidden_act": "silu",
4
+ "pad_token_id": 151643,
 
 
5
  "eos_token_id": 151643,
6
  "intermediate_size": 11008,
7
  "initializer_range": 0.02,
 
8
  "layer_norm_eps": 1e-05,
9
  "hidden_size": 4096,
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 32,
12
+ "rope_theta": 10000,
 
 
 
13
  "scale_attn_weights": true,
 
14
  "tie_word_embeddings": false,
 
15
  "flash_attention": "flash_attn_2",
16
  "vocab_size": 151936,
17
  "rope_scaling": {"type": "dynamic_qwen"},
 
21
  "rope_rank": "updown",
22
  "max_position": 8192,
23
  "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
24
+ "skip_special_tokens": true}, "eos_token_id": [151643, 151644, 151645],
25
  "max_length": 8192}
26
  }
Qwen-7B/bert4torch_config.json CHANGED
@@ -1,25 +1,17 @@
1
  {
2
  "model": "qwen",
3
  "hidden_act": "silu",
4
- "bias_dropout_fusion": true,
5
- "bos_token_id": 151643,
6
- "embd_pdrop": 0.1,
7
  "eos_token_id": 151643,
8
  "intermediate_size": 11008,
9
  "initializer_range": 0.02,
10
- "kv_channels": 128,
11
  "layer_norm_eps": 1e-05,
12
  "hidden_size": 4096,
13
  "num_attention_heads": 32,
14
  "num_hidden_layers": 32,
15
- "n_positions": 6144,
16
- "resid_pdrop": 0.1,
17
- "rotary_emb_base": 10000,
18
- "rotary_pct": 1.0,
19
  "scale_attn_weights": true,
20
- "seq_length": 8192,
21
  "tie_word_embeddings": false,
22
- "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
  "rope_scaling": {"type": "dynamic_qwen"},
 
1
  {
2
  "model": "qwen",
3
  "hidden_act": "silu",
4
+ "pad_token_id": 151643,
 
 
5
  "eos_token_id": 151643,
6
  "intermediate_size": 11008,
7
  "initializer_range": 0.02,
 
8
  "layer_norm_eps": 1e-05,
9
  "hidden_size": 4096,
10
  "num_attention_heads": 32,
11
  "num_hidden_layers": 32,
12
+ "rope_theta": 10000,
 
 
 
13
  "scale_attn_weights": true,
 
14
  "tie_word_embeddings": false,
 
15
  "flash_attention": "flash_attn_2",
16
  "vocab_size": 151936,
17
  "rope_scaling": {"type": "dynamic_qwen"},
Qwen1.5-0.5B-Chat/bert4torch_config.json CHANGED
@@ -24,6 +24,6 @@
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
26
  "convert_lm_logits_dtype": "float32",
27
- "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
 
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
26
  "convert_lm_logits_dtype": "float32",
27
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151643, 151645],
28
  "max_length": 32768}
29
  }
Qwen1.5-1.8B-Chat/bert4torch_config.json CHANGED
@@ -24,6 +24,6 @@
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
26
  "convert_lm_logits_dtype": "float32",
27
- "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
 
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
26
  "convert_lm_logits_dtype": "float32",
27
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151643, 151645],
28
  "max_length": 32768}
29
  }
Qwen1.5-14B-Chat/bert4torch_config.json CHANGED
@@ -24,6 +24,6 @@
24
  "sliding_window": 32768,
25
  "max_window_layers": 35,
26
  "convert_lm_logits_dtype": "float32",
27
- "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
 
24
  "sliding_window": 32768,
25
  "max_window_layers": 35,
26
  "convert_lm_logits_dtype": "float32",
27
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151643, 151645],
28
  "max_length": 32768}
29
  }
Qwen1.5-7B-Chat/bert4torch_config.json CHANGED
@@ -24,6 +24,6 @@
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
26
  "convert_lm_logits_dtype": "float32",
27
- "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
 
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
26
  "convert_lm_logits_dtype": "float32",
27
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151643, 151645],
28
  "max_length": 32768}
29
  }
Qwen2-0.5B-Instruct/bert4torch_config.json CHANGED
@@ -24,6 +24,6 @@
24
  "sliding_window": 32768,
25
  "max_window_layers": 24,
26
  "convert_lm_logits_dtype": "float32",
27
- "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
 
24
  "sliding_window": 32768,
25
  "max_window_layers": 24,
26
  "convert_lm_logits_dtype": "float32",
27
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151643, 151645],
28
  "max_length": 32768}
29
  }
Qwen2-1.5B-Instruct/bert4torch_config.json CHANGED
@@ -24,6 +24,6 @@
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
26
  "convert_lm_logits_dtype": "float32",
27
- "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
 
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
26
  "convert_lm_logits_dtype": "float32",
27
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151643, 151645],
28
  "max_length": 32768}
29
  }
Qwen2-7B-Instruct/bert4torch_config.json CHANGED
@@ -24,6 +24,6 @@
24
  "sliding_window": 131072,
25
  "max_window_layers": 28,
26
  "convert_lm_logits_dtype": "float32",
27
- "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
 
24
  "sliding_window": 131072,
25
  "max_window_layers": 28,
26
  "convert_lm_logits_dtype": "float32",
27
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "eos_token_id": [151643, 151645],
28
  "max_length": 32768}
29
  }