Tongjilibo
/

bert4torch_config

Model card Files Files and versions Community

Tongjilibo commited on Apr 26, 2024

Commit

16f61c2

1 Parent(s): af50f21

为大模型增加generation_config

Browse files

Files changed (33) hide show

BELLE-LLaMA-7B-2M-enc/bert4torch_config.json +2 -1
Baichuan-13B-Base/bert4torch_config.json +2 -1
Baichuan-13B-Chat/bert4torch_config.json +2 -1
Baichuan-7B/bert4torch_config.json +2 -1
Baichuan2-13B-Base/bert4torch_config.json +2 -1
Baichuan2-13B-Chat/bert4torch_config.json +2 -1
Baichuan2-7B-Base/bert4torch_config.json +2 -1
Baichuan2-7B-Chat/bert4torch_config.json +2 -1
Llama-2-13b-chat-hf/bert4torch_config.json +2 -1
Llama-2-13b-hf/bert4torch_config.json +2 -1
Llama-2-7b-chat-hf/bert4torch_config.json +2 -1
Llama-2-7b-hf/bert4torch_config.json +2 -1
Meta-Llama-3-8B-Instruct/bert4torch_config.json +2 -1
Meta-Llama-3-8B/bert4torch_config.json +2 -1
Qwen-14B-Chat/bert4torch_config.json +4 -1
Qwen-14B/bert4torch_config.json +4 -1
Qwen-1_8B-Chat/bert4torch_config.json +4 -1
Qwen-1_8B/bert4torch_config.json +4 -1
Qwen-7B-Chat/bert4torch_config.json +5 -2
Qwen-7B/bert4torch_config.json +5 -2
Yi-6B-200K/bert4torch_config.json +1 -0
Yi-6B/bert4torch_config.json +1 -0
Ziya-LLaMA-13B-v1.1/bert4torch_config.json +2 -1
Ziya-LLaMA-13B-v1/bert4torch_config.json +2 -1
chinese_alpaca_plus_7b/bert4torch_config.json +2 -1
chinese_llama_plus_7b/bert4torch_config.json +2 -1
deepseek-moe-16b-base/bert4torch_config.json +2 -1
falcon-7b-instruct/bert4torch_config.json +1 -1
falcon-7b/bert4torch_config.json +1 -1
internlm-7b/bert4torch_config.json +2 -1
internlm-chat-7b/bert4torch_config.json +1 -1
llama-13b/bert4torch_config.json +2 -1
llama-7b/bert4torch_config.json +2 -1

BELLE-LLaMA-7B-2M-enc/bert4torch_config.json CHANGED Viewed

@@ -9,5 +9,6 @@
     "vocab_size": 32000,
     "segment_vocab_size": 0,
     "skip_init": true,
-    "rope_rank": "updown"
 }

     "vocab_size": 32000,
     "segment_vocab_size": 0,
     "skip_init": true,
+    "rope_rank": "updown",
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
 }

Baichuan-13B-Base/bert4torch_config.json CHANGED Viewed

@@ -17,5 +17,6 @@
     "segment_vocab_size": 0,
     "rope_rank": "updown",
     "p_bias": "alibi",
-    "skip_init": true
     }

     "segment_vocab_size": 0,
     "rope_rank": "updown",
     "p_bias": "alibi",
+    "skip_init": true,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
     }

Baichuan-13B-Chat/bert4torch_config.json CHANGED Viewed

@@ -17,5 +17,6 @@
     "segment_vocab_size": 0,
     "rope_rank": "updown",
     "p_bias": "alibi",
-    "skip_init": true
     }

     "segment_vocab_size": 0,
     "rope_rank": "updown",
     "p_bias": "alibi",
+    "skip_init": true,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
     }

Baichuan-7B/bert4torch_config.json CHANGED Viewed

@@ -16,5 +16,6 @@
     "vocab_size": 64000,
     "segment_vocab_size": 0,
     "rope_rank": "updown",
-    "skip_init": true
     }

     "vocab_size": 64000,
     "segment_vocab_size": 0,
     "rope_rank": "updown",
+    "skip_init": true,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
     }

Baichuan2-13B-Base/bert4torch_config.json CHANGED Viewed

@@ -17,5 +17,6 @@
     "segment_vocab_size": 0,
     "p_bias": "alibi",
     "skip_init": true,
-    "norm_head": true
     }

     "segment_vocab_size": 0,
     "p_bias": "alibi",
     "skip_init": true,
+    "norm_head": true,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
     }

Baichuan2-13B-Chat/bert4torch_config.json CHANGED Viewed

@@ -17,5 +17,6 @@
     "segment_vocab_size": 0,
     "p_bias": "alibi",
     "skip_init": true,
-    "norm_head": true
     }

     "segment_vocab_size": 0,
     "p_bias": "alibi",
     "skip_init": true,
+    "norm_head": true,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
     }

Baichuan2-7B-Base/bert4torch_config.json CHANGED Viewed

@@ -17,5 +17,6 @@
     "segment_vocab_size": 0,
     "rope_rank": "updown",
     "skip_init": true,
-    "norm_head": true
     }

     "segment_vocab_size": 0,
     "rope_rank": "updown",
     "skip_init": true,
+    "norm_head": true,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
     }

Baichuan2-7B-Chat/bert4torch_config.json CHANGED Viewed

@@ -17,5 +17,6 @@
     "segment_vocab_size": 0,
     "rope_rank": "updown",
     "skip_init": true,
-    "norm_head": true
     }

     "segment_vocab_size": 0,
     "rope_rank": "updown",
     "skip_init": true,
+    "norm_head": true,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
     }

Llama-2-13b-chat-hf/bert4torch_config.json CHANGED Viewed

@@ -15,5 +15,6 @@
 	"layer_norm_eps": 1e-5,
 	"rope_rank": "updown",
     "tie_word_embeddings": false,
-	"torch_dtype": "float16"
     }

 	"layer_norm_eps": 1e-5,
 	"rope_rank": "updown",
     "tie_word_embeddings": false,
+	"torch_dtype": "float16",
+    "generation_config": {"tokenizer_config":  {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
     }

Llama-2-13b-hf/bert4torch_config.json CHANGED Viewed

@@ -15,5 +15,6 @@
 	"layer_norm_eps": 1e-5,
 	"rope_rank": "updown",
     "tie_word_embeddings": false,
-	"torch_dtype": "float16"
     }

 	"layer_norm_eps": 1e-5,
 	"rope_rank": "updown",
     "tie_word_embeddings": false,
+	"torch_dtype": "float16",
+    "generation_config": {"tokenizer_config":  {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
     }

Llama-2-7b-chat-hf/bert4torch_config.json CHANGED Viewed

@@ -9,5 +9,6 @@
     "segment_vocab_size": 0,
     "skip_init": true,
     "layer_norm_eps": 1e-6,
-    "rope_rank": "updown"
 }

     "segment_vocab_size": 0,
     "skip_init": true,
     "layer_norm_eps": 1e-6,
+    "rope_rank": "updown",
+    "generation_config": {"tokenizer_config":  {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
 }

Llama-2-7b-hf/bert4torch_config.json CHANGED Viewed

@@ -9,5 +9,6 @@
     "segment_vocab_size": 0,
     "skip_init": true,
     "layer_norm_eps": 1e-5,
-    "rope_rank": "updown"
 }

     "segment_vocab_size": 0,
     "skip_init": true,
     "layer_norm_eps": 1e-5,
+    "rope_rank": "updown",
+    "generation_config": {"tokenizer_config":  {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
 }

Meta-Llama-3-8B-Instruct/bert4torch_config.json CHANGED Viewed

@@ -15,5 +15,6 @@
 	"max_position_embeddings": 8192,
 	"torch_dtype": "bfloat16",
 	"tie_word_embeddings": false,
-	"attention_probs_dropout_prob": 0.0
 }

 	"max_position_embeddings": 8192,
 	"torch_dtype": "bfloat16",
 	"tie_word_embeddings": false,
+	"attention_probs_dropout_prob": 0.0,
+	"generation_config": {"tokenizer_config":  {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
 }

Meta-Llama-3-8B/bert4torch_config.json CHANGED Viewed

@@ -15,5 +15,6 @@
 	"max_position_embeddings": 8192,
 	"torch_dtype": "bfloat16",
 	"tie_word_embeddings": false,
-	"attention_probs_dropout_prob": 0.0
 }

 	"max_position_embeddings": 8192,
 	"torch_dtype": "bfloat16",
 	"tie_word_embeddings": false,
+	"attention_probs_dropout_prob": 0.0,
+	"generation_config": {"tokenizer_config":  {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
 }

Qwen-14B-Chat/bert4torch_config.json CHANGED Viewed

@@ -20,5 +20,8 @@
     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
-    "max_position_embeddings": 8192
 }

     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
+    "max_position_embeddings": 8192,
+    "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
+                          "skip_special_tokens": true}, "end_id": [151644, 151645],
+                          "max_length": 8192}
 }

Qwen-14B/bert4torch_config.json CHANGED Viewed

@@ -20,5 +20,8 @@
     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
-    "max_position_embeddings": 8192
 }

     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
+    "max_position_embeddings": 8192,
+    "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
+                          "skip_special_tokens": true}, "end_id": [151643],
+                          "max_length": 8192}
 }

Qwen-1_8B-Chat/bert4torch_config.json CHANGED Viewed

@@ -27,5 +27,8 @@
     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
-    "max_position": 8192
 }

     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
+    "max_position": 8192,
+    "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
+                          "skip_special_tokens": true}, "end_id": [151644, 151645],
+                          "max_length": 8192}
 }

Qwen-1_8B/bert4torch_config.json CHANGED Viewed

@@ -27,5 +27,8 @@
     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
-    "max_position": 8192
 }

     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
+    "max_position": 8192,
+    "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
+                          "skip_special_tokens": true}, "end_id": [151643],
+                          "max_length": 8192}
 }

Qwen-7B-Chat/bert4torch_config.json CHANGED Viewed

@@ -17,7 +17,7 @@
     "rotary_emb_base": 10000,
     "rotary_pct": 1.0,
     "scale_attn_weights": true,
-    "seq_length": 2048,
     "tie_word_embeddings": false,
     "use_cache": true,
     "flash_attention": "flash_attn_2",
@@ -27,5 +27,8 @@
     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
-    "max_position": 2048
 }

     "rotary_emb_base": 10000,
     "rotary_pct": 1.0,
     "scale_attn_weights": true,
+    "seq_length": 8192,
     "tie_word_embeddings": false,
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
+    "max_position": 8192,
+    "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
+                          "skip_special_tokens": true}, "end_id": [151644, 151645],
+                          "max_length": 8192}
 }

Qwen-7B/bert4torch_config.json CHANGED Viewed

@@ -17,7 +17,7 @@
     "rotary_emb_base": 10000,
     "rotary_pct": 1.0,
     "scale_attn_weights": true,
-    "seq_length": 2048,
     "tie_word_embeddings": false,
     "use_cache": true,
     "flash_attention": "flash_attn_2",
@@ -27,5 +27,8 @@
     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
-    "max_position": 2048
 }

     "rotary_emb_base": 10000,
     "rotary_pct": 1.0,
     "scale_attn_weights": true,
+    "seq_length": 8192,
     "tie_word_embeddings": false,
     "use_cache": true,
     "flash_attention": "flash_attn_2",
     "segment_vocab_size": 0,
     "skip_init": true,
     "rope_rank": "updown",
+    "max_position": 8192,
+    "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
+                          "skip_special_tokens": true}, "end_id": [151643],
+                          "max_length": 8192}
 }

Yi-6B-200K/bert4torch_config.json CHANGED Viewed

@@ -19,6 +19,7 @@
   "skip_init": true,
   "rope_rank": "updown",
   "segment_vocab_size": 0,
   "mapping": {
 	"embeddings.word_embeddings.weight": "model.embed_tokens.weight",
 	"LayerNormFinal.weight": "model.norm.weight",

   "skip_init": true,
   "rope_rank": "updown",
   "segment_vocab_size": 0,
+  "generation_config": {"tokenizer_decode_config": {"skip_special_tokens": true}, "max_length": 4096, "end_id": 2},
   "mapping": {
 	"embeddings.word_embeddings.weight": "model.embed_tokens.weight",
 	"LayerNormFinal.weight": "model.norm.weight",

Yi-6B/bert4torch_config.json CHANGED Viewed

@@ -19,6 +19,7 @@
   "skip_init": true,
   "rope_rank": "updown",
   "segment_vocab_size": 0,
   "mapping": {
 	"embeddings.word_embeddings.weight": "model.embed_tokens.weight",
 	"LayerNormFinal.weight": "model.norm.weight",

   "skip_init": true,
   "rope_rank": "updown",
   "segment_vocab_size": 0,
+  "generation_config": {"tokenizer_decode_config": {"skip_special_tokens": true}, "max_length": 4096, "end_id": 2},
   "mapping": {
 	"embeddings.word_embeddings.weight": "model.embed_tokens.weight",
 	"LayerNormFinal.weight": "model.norm.weight",

Ziya-LLaMA-13B-v1.1/bert4torch_config.json CHANGED Viewed

@@ -16,5 +16,6 @@
 "vocab_size": 39424,
 "segment_vocab_size": 0,
 "skip_init": true,
-"rope_rank": "updown"
 }

 "vocab_size": 39424,
 "segment_vocab_size": 0,
 "skip_init": true,
+"rope_rank": "updown",
+"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
 }

Ziya-LLaMA-13B-v1/bert4torch_config.json CHANGED Viewed

@@ -16,5 +16,6 @@
 "vocab_size": 39424,
 "segment_vocab_size": 0,
 "skip_init": true,
-"rope_rank": "updown"
 }

 "vocab_size": 39424,
 "segment_vocab_size": 0,
 "skip_init": true,
+"rope_rank": "updown",
+"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
 }

chinese_alpaca_plus_7b/bert4torch_config.json CHANGED Viewed

@@ -9,5 +9,6 @@
 	"vocab_size": 49954,
 	"segment_vocab_size": 0,
 	"skip_init": true,
-	"rope_rank": "updown"
 }

 	"vocab_size": 49954,
 	"segment_vocab_size": 0,
 	"skip_init": true,
+	"rope_rank": "updown",
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
 }

chinese_llama_plus_7b/bert4torch_config.json CHANGED Viewed

@@ -9,5 +9,6 @@
 	"vocab_size": 49953,
 	"segment_vocab_size": 0,
 	"skip_init": true,
-	"rope_rank": "updown"
 }

 	"vocab_size": 49953,
 	"segment_vocab_size": 0,
 	"skip_init": true,
+	"rope_rank": "updown",
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
 }

deepseek-moe-16b-base/bert4torch_config.json CHANGED Viewed

@@ -31,5 +31,6 @@
   "vocab_size": 102400,
   "skip_init": true,
   "segment_vocab_size": 0,
-  "rope_rank": "updown"
 }

   "vocab_size": 102400,
   "skip_init": true,
   "segment_vocab_size": 0,
+  "rope_rank": "updown",
+  "generation_config": {"tokenizer_config": {"add_special_tokens": false, "skip_special_tokens": true}, "max_length": 4096, "end_id": 100001}
 }

falcon-7b-instruct/bert4torch_config.json CHANGED Viewed

@@ -11,7 +11,7 @@
     "hidden_dropout": 0.0,
     "hidden_size": 4544,
     "initializer_range": 0.02,
-	"intermediate_size": 8192,
     "hidden_act": "gelu",
     "layer_norm_eps": 1e-05,
     "multi_query": true,

     "hidden_dropout": 0.0,
     "hidden_size": 4544,
     "initializer_range": 0.02,
+	"intermediate_size": 18176,
     "hidden_act": "gelu",
     "layer_norm_eps": 1e-05,
     "multi_query": true,

falcon-7b/bert4torch_config.json CHANGED Viewed

@@ -11,7 +11,7 @@
     "hidden_dropout": 0.0,
     "hidden_size": 4544,
     "initializer_range": 0.02,
-	"intermediate_size": 8192,
     "hidden_act": "gelu",
     "layer_norm_eps": 1e-05,
     "multi_query": true,

     "hidden_dropout": 0.0,
     "hidden_size": 4544,
     "initializer_range": 0.02,
+	"intermediate_size": 18176,
     "hidden_act": "gelu",
     "layer_norm_eps": 1e-05,
     "multi_query": true,

internlm-7b/bert4torch_config.json CHANGED Viewed

@@ -12,5 +12,6 @@
     "skip_init": true,
     "rope_rank": "updown",
     "torch_dtype": "float16",
-    "tie_word_embeddings": false
 }

     "skip_init": true,
     "rope_rank": "updown",
     "torch_dtype": "float16",
+    "tie_word_embeddings": false,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048}
 }

internlm-chat-7b/bert4torch_config.json CHANGED Viewed

@@ -13,5 +13,5 @@
     "rope_rank": "updown",
     "torch_dtype": "float16",
     "tie_word_embeddings": false,
-    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [2, 103028]}
 }

     "rope_rank": "updown",
     "torch_dtype": "float16",
     "tie_word_embeddings": false,
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [2, 103028], "max_length": 2048}
 }

llama-13b/bert4torch_config.json CHANGED Viewed

@@ -11,5 +11,6 @@
 	"vocab_size": 32000,
 	"segment_vocab_size": 0,
 	"skip_init": true,
-	"rope_rank": "updown"
 }

 	"vocab_size": 32000,
 	"segment_vocab_size": 0,
 	"skip_init": true,
+	"rope_rank": "updown",
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
 }

llama-7b/bert4torch_config.json CHANGED Viewed

@@ -11,5 +11,6 @@
 	"vocab_size": 32000,
 	"segment_vocab_size": 0,
 	"skip_init": true,
-	"rope_rank": "updown"
 }

 	"vocab_size": 32000,
 	"segment_vocab_size": 0,
 	"skip_init": true,
+	"rope_rank": "updown",
+    "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
 }