Tongjilibo commited on
Commit
86a6956
1 Parent(s): 64adcff

增加convert_lm_logits_dty

Browse files
Llama-2-13b-chat-hf/bert4torch_config.json CHANGED
@@ -16,5 +16,6 @@
16
  "rope_rank": "updown",
17
  "tie_word_embeddings": false,
18
  "torch_dtype": "float16",
 
19
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
20
  }
 
16
  "rope_rank": "updown",
17
  "tie_word_embeddings": false,
18
  "torch_dtype": "float16",
19
+ "convert_lm_logits_dtype": "float32",
20
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
21
  }
Llama-2-13b-hf/bert4torch_config.json CHANGED
@@ -16,5 +16,6 @@
16
  "rope_rank": "updown",
17
  "tie_word_embeddings": false,
18
  "torch_dtype": "float16",
 
19
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
20
  }
 
16
  "rope_rank": "updown",
17
  "tie_word_embeddings": false,
18
  "torch_dtype": "float16",
19
+ "convert_lm_logits_dtype": "float32",
20
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
21
  }
Llama-2-7b-chat-hf/bert4torch_config.json CHANGED
@@ -10,5 +10,6 @@
10
  "skip_init": true,
11
  "layer_norm_eps": 1e-6,
12
  "rope_rank": "updown",
 
13
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
14
  }
 
10
  "skip_init": true,
11
  "layer_norm_eps": 1e-6,
12
  "rope_rank": "updown",
13
+ "convert_lm_logits_dtype": "float32",
14
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
15
  }
Llama-2-7b-hf/bert4torch_config.json CHANGED
@@ -10,5 +10,6 @@
10
  "skip_init": true,
11
  "layer_norm_eps": 1e-5,
12
  "rope_rank": "updown",
 
13
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
14
  }
 
10
  "skip_init": true,
11
  "layer_norm_eps": 1e-5,
12
  "rope_rank": "updown",
13
+ "convert_lm_logits_dtype": "float32",
14
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
15
  }
Meta-Llama-3-8B-Instruct/bert4torch_config.json CHANGED
@@ -16,5 +16,6 @@
16
  "torch_dtype": "bfloat16",
17
  "tie_word_embeddings": false,
18
  "attention_probs_dropout_prob": 0.0,
 
19
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
20
  }
 
16
  "torch_dtype": "bfloat16",
17
  "tie_word_embeddings": false,
18
  "attention_probs_dropout_prob": 0.0,
19
+ "convert_lm_logits_dtype": "float32",
20
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
21
  }
Meta-Llama-3-8B/bert4torch_config.json CHANGED
@@ -16,5 +16,6 @@
16
  "torch_dtype": "bfloat16",
17
  "tie_word_embeddings": false,
18
  "attention_probs_dropout_prob": 0.0,
 
19
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
20
  }
 
16
  "torch_dtype": "bfloat16",
17
  "tie_word_embeddings": false,
18
  "attention_probs_dropout_prob": 0.0,
19
+ "convert_lm_logits_dtype": "float32",
20
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
21
  }
Qwen1.5-0.5B-Chat/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
Qwen1.5-0.5B/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
28
  "max_length": 32768}
29
  }
Qwen1.5-1.8B-Chat/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
Qwen1.5-1.8B/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 21,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
28
  "max_length": 32768}
29
  }
Qwen1.5-14B-Chat/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 35,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 35,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
Qwen1.5-14B/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 35,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 35,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
28
  "max_length": 32768}
29
  }
Qwen1.5-7B-Chat/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
Qwen1.5-7B/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
28
  "max_length": 32768}
29
  }
Qwen2-0.5B-Instruct/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 24,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 24,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
Qwen2-0.5B/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 24,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 24,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
28
  "max_length": 32768}
29
  }
Qwen2-1.5B-Instruct/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
Qwen2-1.5B/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 32768,
25
  "max_window_layers": 28,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
28
  "max_length": 32768}
29
  }
Qwen2-7B-Instruct/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 131072,
25
  "max_window_layers": 28,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 131072,
25
  "max_window_layers": 28,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
28
  "max_length": 32768}
29
  }
Qwen2-7B/bert4torch_config.json CHANGED
@@ -23,6 +23,7 @@
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 131072,
25
  "max_window_layers": 28,
 
26
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
27
  "max_length": 32768}
28
  }
 
23
  "max_position_embeddings": 32768,
24
  "sliding_window": 131072,
25
  "max_window_layers": 28,
26
+ "convert_lm_logits_dtype": "float32",
27
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
28
  "max_length": 32768}
29
  }
llama-13b/bert4torch_config.json CHANGED
@@ -12,5 +12,6 @@
12
  "segment_vocab_size": 0,
13
  "skip_init": true,
14
  "rope_rank": "updown",
 
15
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
16
  }
 
12
  "segment_vocab_size": 0,
13
  "skip_init": true,
14
  "rope_rank": "updown",
15
+ "convert_lm_logits_dtype": "float32",
16
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
17
  }
llama-7b/bert4torch_config.json CHANGED
@@ -12,5 +12,6 @@
12
  "segment_vocab_size": 0,
13
  "skip_init": true,
14
  "rope_rank": "updown",
 
15
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
16
  }
 
12
  "segment_vocab_size": 0,
13
  "skip_init": true,
14
  "rope_rank": "updown",
15
+ "convert_lm_logits_dtype": "float32",
16
  "generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
17
  }