Tongjilibo commited on
Commit
c561284
1 Parent(s): 1178c10

修改参数名

Browse files
Qwen-14B-Chat/bert4torch_config.json CHANGED
@@ -15,7 +15,7 @@
15
  "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
- "use_dynamic_ntk": true,
19
  "use_logn_attn": true,
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
 
15
  "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
+ "rope_scaling": {"type": "dynamic"},
19
  "use_logn_attn": true,
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
Qwen-14B/bert4torch_config.json CHANGED
@@ -15,7 +15,7 @@
15
  "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
- "use_dynamic_ntk": true,
19
  "use_logn_attn": true,
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
 
15
  "use_cache": true,
16
  "flash_attention": "flash_attn_2",
17
  "vocab_size": 152064,
18
+ "rope_scaling": {"type": "dynamic"},
19
  "use_logn_attn": true,
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
Qwen-1_8B-Chat/bert4torch_config.json CHANGED
@@ -22,7 +22,7 @@
22
  "use_cache": true,
23
  "use_flash_attn": true,
24
  "vocab_size": 151936,
25
- "use_dynamic_ntk": true,
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
 
22
  "use_cache": true,
23
  "use_flash_attn": true,
24
  "vocab_size": 151936,
25
+ "rope_scaling": {"type": "dynamic"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
Qwen-1_8B/bert4torch_config.json CHANGED
@@ -22,7 +22,7 @@
22
  "use_cache": true,
23
  "use_flash_attn": true,
24
  "vocab_size": 151936,
25
- "use_dynamic_ntk": true,
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
 
22
  "use_cache": true,
23
  "use_flash_attn": true,
24
  "vocab_size": 151936,
25
+ "rope_scaling": {"type": "dynamic"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
Qwen-7B-Chat/bert4torch_config.json CHANGED
@@ -22,7 +22,7 @@
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
- "use_dynamic_ntk": true,
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
 
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
+ "rope_scaling": {"type": "dynamic"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
Qwen-7B/bert4torch_config.json CHANGED
@@ -22,7 +22,7 @@
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
- "use_dynamic_ntk": true,
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
 
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
24
  "vocab_size": 151936,
25
+ "rope_scaling": {"type": "dynamic"},
26
  "use_logn_attn": true,
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
chatglm2-6b-32k/bert4torch_config.json CHANGED
@@ -16,7 +16,10 @@
16
  "pad_token_id": 2,
17
  "rmsnorm": true,
18
  "rope_rank": "adjacent",
19
- "rope_scaling_factor": 16,
 
 
 
20
  "position_encoding_2d": true,
21
  "flash_attention": true,
22
  "generation_config": {"max_length": 32768}
 
16
  "pad_token_id": 2,
17
  "rmsnorm": true,
18
  "rope_rank": "adjacent",
19
+ "rope_scaling": {
20
+ "type": "linear",
21
+ "factor": 16
22
+ },
23
  "position_encoding_2d": true,
24
  "flash_attention": true,
25
  "generation_config": {"max_length": 32768}