Tongjilibo commited on
Commit
12edfac
1 Parent(s): 4db87e1

调整qwen的rope

Browse files
Qwen-14B-Chat/bert4torch_config.json CHANGED
@@ -15,7 +15,7 @@
15
  "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
- "rope_scaling": {"type": "dynamic"},
19
  "use_logn_attn": true,
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
 
15
  "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
+ "rope_scaling": {"type": "dynamic_qwen"},
19
  "use_logn_attn": true,
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
Qwen-14B/bert4torch_config.json CHANGED
@@ -15,7 +15,7 @@
15
  "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
- "rope_scaling": {"type": "dynamic"},
19
  "use_logn_attn": true,
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
 
15
  "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
+ "rope_scaling": {"type": "dynamic_qwen"},
19
  "use_logn_attn": true,
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
Qwen-1_8B-Chat/bert4torch_config.json CHANGED
@@ -22,7 +22,7 @@
22
  "use_cache": true,
23
  "use_flash_attn": true,
24
  "vocab_size": 151936,
25
- "rope_scaling": {"type": "dynamic"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
 
22
  "use_cache": true,
23
  "use_flash_attn": true,
24
  "vocab_size": 151936,
25
+ "rope_scaling": {"type": "dynamic_qwen"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
Qwen-1_8B/bert4torch_config.json CHANGED
@@ -22,7 +22,7 @@
22
  "use_cache": true,
23
  "use_flash_attn": true,
24
  "vocab_size": 151936,
25
- "rope_scaling": {"type": "dynamic"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
 
22
  "use_cache": true,
23
  "use_flash_attn": true,
24
  "vocab_size": 151936,
25
+ "rope_scaling": {"type": "dynamic_qwen"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
Qwen-7B-Chat/bert4torch_config.json CHANGED
@@ -22,7 +22,7 @@
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
- "rope_scaling": {"type": "dynamic"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
 
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
+ "rope_scaling": {"type": "dynamic_qwen"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
Qwen-7B/bert4torch_config.json CHANGED
@@ -22,7 +22,7 @@
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
- "rope_scaling": {"type": "dynamic"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
 
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
+ "rope_scaling": {"type": "dynamic_qwen"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,