Tongjilibo commited on
Commit
16f61c2
1 Parent(s): af50f21

为大模型增加generation_config

Browse files
Files changed (33) hide show
  1. BELLE-LLaMA-7B-2M-enc/bert4torch_config.json +2 -1
  2. Baichuan-13B-Base/bert4torch_config.json +2 -1
  3. Baichuan-13B-Chat/bert4torch_config.json +2 -1
  4. Baichuan-7B/bert4torch_config.json +2 -1
  5. Baichuan2-13B-Base/bert4torch_config.json +2 -1
  6. Baichuan2-13B-Chat/bert4torch_config.json +2 -1
  7. Baichuan2-7B-Base/bert4torch_config.json +2 -1
  8. Baichuan2-7B-Chat/bert4torch_config.json +2 -1
  9. Llama-2-13b-chat-hf/bert4torch_config.json +2 -1
  10. Llama-2-13b-hf/bert4torch_config.json +2 -1
  11. Llama-2-7b-chat-hf/bert4torch_config.json +2 -1
  12. Llama-2-7b-hf/bert4torch_config.json +2 -1
  13. Meta-Llama-3-8B-Instruct/bert4torch_config.json +2 -1
  14. Meta-Llama-3-8B/bert4torch_config.json +2 -1
  15. Qwen-14B-Chat/bert4torch_config.json +4 -1
  16. Qwen-14B/bert4torch_config.json +4 -1
  17. Qwen-1_8B-Chat/bert4torch_config.json +4 -1
  18. Qwen-1_8B/bert4torch_config.json +4 -1
  19. Qwen-7B-Chat/bert4torch_config.json +5 -2
  20. Qwen-7B/bert4torch_config.json +5 -2
  21. Yi-6B-200K/bert4torch_config.json +1 -0
  22. Yi-6B/bert4torch_config.json +1 -0
  23. Ziya-LLaMA-13B-v1.1/bert4torch_config.json +2 -1
  24. Ziya-LLaMA-13B-v1/bert4torch_config.json +2 -1
  25. chinese_alpaca_plus_7b/bert4torch_config.json +2 -1
  26. chinese_llama_plus_7b/bert4torch_config.json +2 -1
  27. deepseek-moe-16b-base/bert4torch_config.json +2 -1
  28. falcon-7b-instruct/bert4torch_config.json +1 -1
  29. falcon-7b/bert4torch_config.json +1 -1
  30. internlm-7b/bert4torch_config.json +2 -1
  31. internlm-chat-7b/bert4torch_config.json +1 -1
  32. llama-13b/bert4torch_config.json +2 -1
  33. llama-7b/bert4torch_config.json +2 -1
BELLE-LLaMA-7B-2M-enc/bert4torch_config.json CHANGED
@@ -9,5 +9,6 @@
9
  "vocab_size": 32000,
10
  "segment_vocab_size": 0,
11
  "skip_init": true,
12
- "rope_rank": "updown"
 
13
  }
 
9
  "vocab_size": 32000,
10
  "segment_vocab_size": 0,
11
  "skip_init": true,
12
+ "rope_rank": "updown",
13
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
14
  }
Baichuan-13B-Base/bert4torch_config.json CHANGED
@@ -17,5 +17,6 @@
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
  "p_bias": "alibi",
20
- "skip_init": true
 
21
  }
 
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
  "p_bias": "alibi",
20
+ "skip_init": true,
21
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
22
  }
Baichuan-13B-Chat/bert4torch_config.json CHANGED
@@ -17,5 +17,6 @@
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
  "p_bias": "alibi",
20
- "skip_init": true
 
21
  }
 
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
  "p_bias": "alibi",
20
+ "skip_init": true,
21
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
22
  }
Baichuan-7B/bert4torch_config.json CHANGED
@@ -16,5 +16,6 @@
16
  "vocab_size": 64000,
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
- "skip_init": true
 
20
  }
 
16
  "vocab_size": 64000,
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
+ "skip_init": true,
20
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
21
  }
Baichuan2-13B-Base/bert4torch_config.json CHANGED
@@ -17,5 +17,6 @@
17
  "segment_vocab_size": 0,
18
  "p_bias": "alibi",
19
  "skip_init": true,
20
- "norm_head": true
 
21
  }
 
17
  "segment_vocab_size": 0,
18
  "p_bias": "alibi",
19
  "skip_init": true,
20
+ "norm_head": true,
21
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
22
  }
Baichuan2-13B-Chat/bert4torch_config.json CHANGED
@@ -17,5 +17,6 @@
17
  "segment_vocab_size": 0,
18
  "p_bias": "alibi",
19
  "skip_init": true,
20
- "norm_head": true
 
21
  }
 
17
  "segment_vocab_size": 0,
18
  "p_bias": "alibi",
19
  "skip_init": true,
20
+ "norm_head": true,
21
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
22
  }
Baichuan2-7B-Base/bert4torch_config.json CHANGED
@@ -17,5 +17,6 @@
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
  "skip_init": true,
20
- "norm_head": true
 
21
  }
 
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
  "skip_init": true,
20
+ "norm_head": true,
21
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
22
  }
Baichuan2-7B-Chat/bert4torch_config.json CHANGED
@@ -17,5 +17,6 @@
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
  "skip_init": true,
20
- "norm_head": true
 
21
  }
 
17
  "segment_vocab_size": 0,
18
  "rope_rank": "updown",
19
  "skip_init": true,
20
+ "norm_head": true,
21
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 4096}
22
  }
Llama-2-13b-chat-hf/bert4torch_config.json CHANGED
@@ -15,5 +15,6 @@
15
  "layer_norm_eps": 1e-5,
16
  "rope_rank": "updown",
17
  "tie_word_embeddings": false,
18
- "torch_dtype": "float16"
 
19
  }
 
15
  "layer_norm_eps": 1e-5,
16
  "rope_rank": "updown",
17
  "tie_word_embeddings": false,
18
+ "torch_dtype": "float16",
19
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
20
  }
Llama-2-13b-hf/bert4torch_config.json CHANGED
@@ -15,5 +15,6 @@
15
  "layer_norm_eps": 1e-5,
16
  "rope_rank": "updown",
17
  "tie_word_embeddings": false,
18
- "torch_dtype": "float16"
 
19
  }
 
15
  "layer_norm_eps": 1e-5,
16
  "rope_rank": "updown",
17
  "tie_word_embeddings": false,
18
+ "torch_dtype": "float16",
19
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
20
  }
Llama-2-7b-chat-hf/bert4torch_config.json CHANGED
@@ -9,5 +9,6 @@
9
  "segment_vocab_size": 0,
10
  "skip_init": true,
11
  "layer_norm_eps": 1e-6,
12
- "rope_rank": "updown"
 
13
  }
 
9
  "segment_vocab_size": 0,
10
  "skip_init": true,
11
  "layer_norm_eps": 1e-6,
12
+ "rope_rank": "updown",
13
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
14
  }
Llama-2-7b-hf/bert4torch_config.json CHANGED
@@ -9,5 +9,6 @@
9
  "segment_vocab_size": 0,
10
  "skip_init": true,
11
  "layer_norm_eps": 1e-5,
12
- "rope_rank": "updown"
 
13
  }
 
9
  "segment_vocab_size": 0,
10
  "skip_init": true,
11
  "layer_norm_eps": 1e-5,
12
+ "rope_rank": "updown",
13
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
14
  }
Meta-Llama-3-8B-Instruct/bert4torch_config.json CHANGED
@@ -15,5 +15,6 @@
15
  "max_position_embeddings": 8192,
16
  "torch_dtype": "bfloat16",
17
  "tie_word_embeddings": false,
18
- "attention_probs_dropout_prob": 0.0
 
19
  }
 
15
  "max_position_embeddings": 8192,
16
  "torch_dtype": "bfloat16",
17
  "tie_word_embeddings": false,
18
+ "attention_probs_dropout_prob": 0.0,
19
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
20
  }
Meta-Llama-3-8B/bert4torch_config.json CHANGED
@@ -15,5 +15,6 @@
15
  "max_position_embeddings": 8192,
16
  "torch_dtype": "bfloat16",
17
  "tie_word_embeddings": false,
18
- "attention_probs_dropout_prob": 0.0
 
19
  }
 
15
  "max_position_embeddings": 8192,
16
  "torch_dtype": "bfloat16",
17
  "tie_word_embeddings": false,
18
+ "attention_probs_dropout_prob": 0.0,
19
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
20
  }
Qwen-14B-Chat/bert4torch_config.json CHANGED
@@ -20,5 +20,8 @@
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
22
  "rope_rank": "updown",
23
- "max_position_embeddings": 8192
 
 
 
24
  }
 
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
22
  "rope_rank": "updown",
23
+ "max_position_embeddings": 8192,
24
+ "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
25
+ "skip_special_tokens": true}, "end_id": [151644, 151645],
26
+ "max_length": 8192}
27
  }
Qwen-14B/bert4torch_config.json CHANGED
@@ -20,5 +20,8 @@
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
22
  "rope_rank": "updown",
23
- "max_position_embeddings": 8192
 
 
 
24
  }
 
20
  "segment_vocab_size": 0,
21
  "skip_init": true,
22
  "rope_rank": "updown",
23
+ "max_position_embeddings": 8192,
24
+ "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
25
+ "skip_special_tokens": true}, "end_id": [151643],
26
+ "max_length": 8192}
27
  }
Qwen-1_8B-Chat/bert4torch_config.json CHANGED
@@ -27,5 +27,8 @@
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
29
  "rope_rank": "updown",
30
- "max_position": 8192
 
 
 
31
  }
 
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
29
  "rope_rank": "updown",
30
+ "max_position": 8192,
31
+ "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
32
+ "skip_special_tokens": true}, "end_id": [151644, 151645],
33
+ "max_length": 8192}
34
  }
Qwen-1_8B/bert4torch_config.json CHANGED
@@ -27,5 +27,8 @@
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
29
  "rope_rank": "updown",
30
- "max_position": 8192
 
 
 
31
  }
 
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
29
  "rope_rank": "updown",
30
+ "max_position": 8192,
31
+ "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
32
+ "skip_special_tokens": true}, "end_id": [151643],
33
+ "max_length": 8192}
34
  }
Qwen-7B-Chat/bert4torch_config.json CHANGED
@@ -17,7 +17,7 @@
17
  "rotary_emb_base": 10000,
18
  "rotary_pct": 1.0,
19
  "scale_attn_weights": true,
20
- "seq_length": 2048,
21
  "tie_word_embeddings": false,
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
@@ -27,5 +27,8 @@
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
29
  "rope_rank": "updown",
30
- "max_position": 2048
 
 
 
31
  }
 
17
  "rotary_emb_base": 10000,
18
  "rotary_pct": 1.0,
19
  "scale_attn_weights": true,
20
+ "seq_length": 8192,
21
  "tie_word_embeddings": false,
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
 
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
29
  "rope_rank": "updown",
30
+ "max_position": 8192,
31
+ "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
32
+ "skip_special_tokens": true}, "end_id": [151644, 151645],
33
+ "max_length": 8192}
34
  }
Qwen-7B/bert4torch_config.json CHANGED
@@ -17,7 +17,7 @@
17
  "rotary_emb_base": 10000,
18
  "rotary_pct": 1.0,
19
  "scale_attn_weights": true,
20
- "seq_length": 2048,
21
  "tie_word_embeddings": false,
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
@@ -27,5 +27,8 @@
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
29
  "rope_rank": "updown",
30
- "max_position": 2048
 
 
 
31
  }
 
17
  "rotary_emb_base": 10000,
18
  "rotary_pct": 1.0,
19
  "scale_attn_weights": true,
20
+ "seq_length": 8192,
21
  "tie_word_embeddings": false,
22
  "use_cache": true,
23
  "flash_attention": "flash_attn_2",
 
27
  "segment_vocab_size": 0,
28
  "skip_init": true,
29
  "rope_rank": "updown",
30
+ "max_position": 8192,
31
+ "generation_config": {"tokenizer_config": {"allowed_special": ["<|im_end|>", "<|im_start|>", "<|endoftext|>"],
32
+ "skip_special_tokens": true}, "end_id": [151643],
33
+ "max_length": 8192}
34
  }
Yi-6B-200K/bert4torch_config.json CHANGED
@@ -19,6 +19,7 @@
19
  "skip_init": true,
20
  "rope_rank": "updown",
21
  "segment_vocab_size": 0,
 
22
  "mapping": {
23
  "embeddings.word_embeddings.weight": "model.embed_tokens.weight",
24
  "LayerNormFinal.weight": "model.norm.weight",
 
19
  "skip_init": true,
20
  "rope_rank": "updown",
21
  "segment_vocab_size": 0,
22
+ "generation_config": {"tokenizer_decode_config": {"skip_special_tokens": true}, "max_length": 4096, "end_id": 2},
23
  "mapping": {
24
  "embeddings.word_embeddings.weight": "model.embed_tokens.weight",
25
  "LayerNormFinal.weight": "model.norm.weight",
Yi-6B/bert4torch_config.json CHANGED
@@ -19,6 +19,7 @@
19
  "skip_init": true,
20
  "rope_rank": "updown",
21
  "segment_vocab_size": 0,
 
22
  "mapping": {
23
  "embeddings.word_embeddings.weight": "model.embed_tokens.weight",
24
  "LayerNormFinal.weight": "model.norm.weight",
 
19
  "skip_init": true,
20
  "rope_rank": "updown",
21
  "segment_vocab_size": 0,
22
+ "generation_config": {"tokenizer_decode_config": {"skip_special_tokens": true}, "max_length": 4096, "end_id": 2},
23
  "mapping": {
24
  "embeddings.word_embeddings.weight": "model.embed_tokens.weight",
25
  "LayerNormFinal.weight": "model.norm.weight",
Ziya-LLaMA-13B-v1.1/bert4torch_config.json CHANGED
@@ -16,5 +16,6 @@
16
  "vocab_size": 39424,
17
  "segment_vocab_size": 0,
18
  "skip_init": true,
19
- "rope_rank": "updown"
 
20
  }
 
16
  "vocab_size": 39424,
17
  "segment_vocab_size": 0,
18
  "skip_init": true,
19
+ "rope_rank": "updown",
20
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
21
  }
Ziya-LLaMA-13B-v1/bert4torch_config.json CHANGED
@@ -16,5 +16,6 @@
16
  "vocab_size": 39424,
17
  "segment_vocab_size": 0,
18
  "skip_init": true,
19
- "rope_rank": "updown"
 
20
  }
 
16
  "vocab_size": 39424,
17
  "segment_vocab_size": 0,
18
  "skip_init": true,
19
+ "rope_rank": "updown",
20
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
21
  }
chinese_alpaca_plus_7b/bert4torch_config.json CHANGED
@@ -9,5 +9,6 @@
9
  "vocab_size": 49954,
10
  "segment_vocab_size": 0,
11
  "skip_init": true,
12
- "rope_rank": "updown"
 
13
  }
 
9
  "vocab_size": 49954,
10
  "segment_vocab_size": 0,
11
  "skip_init": true,
12
+ "rope_rank": "updown",
13
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
14
  }
chinese_llama_plus_7b/bert4torch_config.json CHANGED
@@ -9,5 +9,6 @@
9
  "vocab_size": 49953,
10
  "segment_vocab_size": 0,
11
  "skip_init": true,
12
- "rope_rank": "updown"
 
13
  }
 
9
  "vocab_size": 49953,
10
  "segment_vocab_size": 0,
11
  "skip_init": true,
12
+ "rope_rank": "updown",
13
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
14
  }
deepseek-moe-16b-base/bert4torch_config.json CHANGED
@@ -31,5 +31,6 @@
31
  "vocab_size": 102400,
32
  "skip_init": true,
33
  "segment_vocab_size": 0,
34
- "rope_rank": "updown"
 
35
  }
 
31
  "vocab_size": 102400,
32
  "skip_init": true,
33
  "segment_vocab_size": 0,
34
+ "rope_rank": "updown",
35
+ "generation_config": {"tokenizer_config": {"add_special_tokens": false, "skip_special_tokens": true}, "max_length": 4096, "end_id": 100001}
36
  }
falcon-7b-instruct/bert4torch_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "hidden_dropout": 0.0,
12
  "hidden_size": 4544,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 8192,
15
  "hidden_act": "gelu",
16
  "layer_norm_eps": 1e-05,
17
  "multi_query": true,
 
11
  "hidden_dropout": 0.0,
12
  "hidden_size": 4544,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 18176,
15
  "hidden_act": "gelu",
16
  "layer_norm_eps": 1e-05,
17
  "multi_query": true,
falcon-7b/bert4torch_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "hidden_dropout": 0.0,
12
  "hidden_size": 4544,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 8192,
15
  "hidden_act": "gelu",
16
  "layer_norm_eps": 1e-05,
17
  "multi_query": true,
 
11
  "hidden_dropout": 0.0,
12
  "hidden_size": 4544,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 18176,
15
  "hidden_act": "gelu",
16
  "layer_norm_eps": 1e-05,
17
  "multi_query": true,
internlm-7b/bert4torch_config.json CHANGED
@@ -12,5 +12,6 @@
12
  "skip_init": true,
13
  "rope_rank": "updown",
14
  "torch_dtype": "float16",
15
- "tie_word_embeddings": false
 
16
  }
 
12
  "skip_init": true,
13
  "rope_rank": "updown",
14
  "torch_dtype": "float16",
15
+ "tie_word_embeddings": false,
16
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048}
17
  }
internlm-chat-7b/bert4torch_config.json CHANGED
@@ -13,5 +13,5 @@
13
  "rope_rank": "updown",
14
  "torch_dtype": "float16",
15
  "tie_word_embeddings": false,
16
- "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [2, 103028]}
17
  }
 
13
  "rope_rank": "updown",
14
  "torch_dtype": "float16",
15
  "tie_word_embeddings": false,
16
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [2, 103028], "max_length": 2048}
17
  }
llama-13b/bert4torch_config.json CHANGED
@@ -11,5 +11,6 @@
11
  "vocab_size": 32000,
12
  "segment_vocab_size": 0,
13
  "skip_init": true,
14
- "rope_rank": "updown"
 
15
  }
 
11
  "vocab_size": 32000,
12
  "segment_vocab_size": 0,
13
  "skip_init": true,
14
+ "rope_rank": "updown",
15
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
16
  }
llama-7b/bert4torch_config.json CHANGED
@@ -11,5 +11,6 @@
11
  "vocab_size": 32000,
12
  "segment_vocab_size": 0,
13
  "skip_init": true,
14
- "rope_rank": "updown"
 
15
  }
 
11
  "vocab_size": 32000,
12
  "segment_vocab_size": 0,
13
  "skip_init": true,
14
+ "rope_rank": "updown",
15
+ "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
16
  }