Tongjilibo
/

bert4torch_config

Tongjilibo commited on Jul 25

Commit

12edfac

•

1 Parent(s): 4db87e1

调整qwen的rope

Files changed (6) hide show

Qwen-14B-Chat/bert4torch_config.json CHANGED Viewed

@@ -15,7 +15,7 @@
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 152064,
-    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 152064,
+    "rope_scaling": {"type": "dynamic_qwen"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-14B/bert4torch_config.json CHANGED Viewed

@@ -15,7 +15,7 @@
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 152064,
-    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 152064,
+    "rope_scaling": {"type": "dynamic_qwen"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-1_8B-Chat/bert4torch_config.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "use_cache": true,
     "use_flash_attn": true,
     "vocab_size": 151936,
-    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "use_flash_attn": true,
     "vocab_size": 151936,
+    "rope_scaling": {"type": "dynamic_qwen"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-1_8B/bert4torch_config.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "use_cache": true,
     "use_flash_attn": true,
     "vocab_size": 151936,
-    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "use_flash_attn": true,
     "vocab_size": 151936,
+    "rope_scaling": {"type": "dynamic_qwen"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-7B-Chat/bert4torch_config.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 151936,
-    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 151936,
+    "rope_scaling": {"type": "dynamic_qwen"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-7B/bert4torch_config.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 151936,
-    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 151936,
+    "rope_scaling": {"type": "dynamic_qwen"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,