Tongjilibo
/

bert4torch_config

Tongjilibo commited on Jul 24

Commit

c561284

•

1 Parent(s): 1178c10

修改参数名

Files changed (7) hide show

Qwen-14B-Chat/bert4torch_config.json CHANGED Viewed

@@ -15,7 +15,7 @@
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 152064,
-    "use_dynamic_ntk": true,
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 152064,
+    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-14B/bert4torch_config.json CHANGED Viewed

@@ -15,7 +15,7 @@
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 152064,
-    "use_dynamic_ntk": true,
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 152064,
+    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-1_8B-Chat/bert4torch_config.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "use_cache": true,
     "use_flash_attn": true,
     "vocab_size": 151936,
-    "use_dynamic_ntk": true,
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "use_flash_attn": true,
     "vocab_size": 151936,
+    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-1_8B/bert4torch_config.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "use_cache": true,
     "use_flash_attn": true,
     "vocab_size": 151936,
-    "use_dynamic_ntk": true,
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "use_flash_attn": true,
     "vocab_size": 151936,
+    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-7B-Chat/bert4torch_config.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 151936,
-    "use_dynamic_ntk": true,
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 151936,
+    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

Qwen-7B/bert4torch_config.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 151936,
-    "use_dynamic_ntk": true,
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "vocab_size": 151936,
+    "rope_scaling": {"type": "dynamic"},
     "use_logn_attn": true,
     "segment_vocab_size": 0,
     "skip_init": true,

chatglm2-6b-32k/bert4torch_config.json CHANGED Viewed

@@ -16,7 +16,10 @@
   "pad_token_id": 2,
   "rmsnorm": true,
   "rope_rank": "adjacent",
-  "rope_scaling_factor": 16,
   "position_encoding_2d": true,
   "flash_attention": true,
   "generation_config": {"max_length": 32768}

   "pad_token_id": 2,
   "rmsnorm": true,
   "rope_rank": "adjacent",
+  "rope_scaling": {
+    "type": "linear",
+    "factor": 16
+  },
   "position_encoding_2d": true,
   "flash_attention": true,
   "generation_config": {"max_length": 32768}