Tongjilibo
commited on
Commit
•
86a6956
1
Parent(s):
64adcff
增加convert_lm_logits_dty
Browse files- Llama-2-13b-chat-hf/bert4torch_config.json +1 -0
- Llama-2-13b-hf/bert4torch_config.json +1 -0
- Llama-2-7b-chat-hf/bert4torch_config.json +1 -0
- Llama-2-7b-hf/bert4torch_config.json +1 -0
- Meta-Llama-3-8B-Instruct/bert4torch_config.json +1 -0
- Meta-Llama-3-8B/bert4torch_config.json +1 -0
- Qwen1.5-0.5B-Chat/bert4torch_config.json +1 -0
- Qwen1.5-0.5B/bert4torch_config.json +1 -0
- Qwen1.5-1.8B-Chat/bert4torch_config.json +1 -0
- Qwen1.5-1.8B/bert4torch_config.json +1 -0
- Qwen1.5-14B-Chat/bert4torch_config.json +1 -0
- Qwen1.5-14B/bert4torch_config.json +1 -0
- Qwen1.5-7B-Chat/bert4torch_config.json +1 -0
- Qwen1.5-7B/bert4torch_config.json +1 -0
- Qwen2-0.5B-Instruct/bert4torch_config.json +1 -0
- Qwen2-0.5B/bert4torch_config.json +1 -0
- Qwen2-1.5B-Instruct/bert4torch_config.json +1 -0
- Qwen2-1.5B/bert4torch_config.json +1 -0
- Qwen2-7B-Instruct/bert4torch_config.json +1 -0
- Qwen2-7B/bert4torch_config.json +1 -0
- llama-13b/bert4torch_config.json +1 -0
- llama-7b/bert4torch_config.json +1 -0
Llama-2-13b-chat-hf/bert4torch_config.json
CHANGED
@@ -16,5 +16,6 @@
|
|
16 |
"rope_rank": "updown",
|
17 |
"tie_word_embeddings": false,
|
18 |
"torch_dtype": "float16",
|
|
|
19 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
|
20 |
}
|
|
|
16 |
"rope_rank": "updown",
|
17 |
"tie_word_embeddings": false,
|
18 |
"torch_dtype": "float16",
|
19 |
+
"convert_lm_logits_dtype": "float32",
|
20 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
|
21 |
}
|
Llama-2-13b-hf/bert4torch_config.json
CHANGED
@@ -16,5 +16,6 @@
|
|
16 |
"rope_rank": "updown",
|
17 |
"tie_word_embeddings": false,
|
18 |
"torch_dtype": "float16",
|
|
|
19 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
|
20 |
}
|
|
|
16 |
"rope_rank": "updown",
|
17 |
"tie_word_embeddings": false,
|
18 |
"torch_dtype": "float16",
|
19 |
+
"convert_lm_logits_dtype": "float32",
|
20 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
|
21 |
}
|
Llama-2-7b-chat-hf/bert4torch_config.json
CHANGED
@@ -10,5 +10,6 @@
|
|
10 |
"skip_init": true,
|
11 |
"layer_norm_eps": 1e-6,
|
12 |
"rope_rank": "updown",
|
|
|
13 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
|
14 |
}
|
|
|
10 |
"skip_init": true,
|
11 |
"layer_norm_eps": 1e-6,
|
12 |
"rope_rank": "updown",
|
13 |
+
"convert_lm_logits_dtype": "float32",
|
14 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
|
15 |
}
|
Llama-2-7b-hf/bert4torch_config.json
CHANGED
@@ -10,5 +10,6 @@
|
|
10 |
"skip_init": true,
|
11 |
"layer_norm_eps": 1e-5,
|
12 |
"rope_rank": "updown",
|
|
|
13 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
|
14 |
}
|
|
|
10 |
"skip_init": true,
|
11 |
"layer_norm_eps": 1e-5,
|
12 |
"rope_rank": "updown",
|
13 |
+
"convert_lm_logits_dtype": "float32",
|
14 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false},"end_id": 2}
|
15 |
}
|
Meta-Llama-3-8B-Instruct/bert4torch_config.json
CHANGED
@@ -16,5 +16,6 @@
|
|
16 |
"torch_dtype": "bfloat16",
|
17 |
"tie_word_embeddings": false,
|
18 |
"attention_probs_dropout_prob": 0.0,
|
|
|
19 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
|
20 |
}
|
|
|
16 |
"torch_dtype": "bfloat16",
|
17 |
"tie_word_embeddings": false,
|
18 |
"attention_probs_dropout_prob": 0.0,
|
19 |
+
"convert_lm_logits_dtype": "float32",
|
20 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
|
21 |
}
|
Meta-Llama-3-8B/bert4torch_config.json
CHANGED
@@ -16,5 +16,6 @@
|
|
16 |
"torch_dtype": "bfloat16",
|
17 |
"tie_word_embeddings": false,
|
18 |
"attention_probs_dropout_prob": 0.0,
|
|
|
19 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
|
20 |
}
|
|
|
16 |
"torch_dtype": "bfloat16",
|
17 |
"tie_word_embeddings": false,
|
18 |
"attention_probs_dropout_prob": 0.0,
|
19 |
+
"convert_lm_logits_dtype": "float32",
|
20 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true, "add_special_tokens": false}, "end_id": [128001, 128009]}
|
21 |
}
|
Qwen1.5-0.5B-Chat/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 21,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 21,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen1.5-0.5B/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 21,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 21,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen1.5-1.8B-Chat/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 21,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 21,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen1.5-1.8B/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 21,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 21,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen1.5-14B-Chat/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 35,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 35,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen1.5-14B/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 35,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 35,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen1.5-7B-Chat/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 28,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 28,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen1.5-7B/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 28,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 28,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen2-0.5B-Instruct/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 24,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 24,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen2-0.5B/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 24,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 24,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen2-1.5B-Instruct/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 28,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 28,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen2-1.5B/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 28,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 32768,
|
25 |
"max_window_layers": 28,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen2-7B-Instruct/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 131072,
|
25 |
"max_window_layers": 28,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 131072,
|
25 |
"max_window_layers": 28,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151644, 151645],
|
28 |
"max_length": 32768}
|
29 |
}
|
Qwen2-7B/bert4torch_config.json
CHANGED
@@ -23,6 +23,7 @@
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 131072,
|
25 |
"max_window_layers": 28,
|
|
|
26 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
27 |
"max_length": 32768}
|
28 |
}
|
|
|
23 |
"max_position_embeddings": 32768,
|
24 |
"sliding_window": 131072,
|
25 |
"max_window_layers": 28,
|
26 |
+
"convert_lm_logits_dtype": "float32",
|
27 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "end_id": [151643],
|
28 |
"max_length": 32768}
|
29 |
}
|
llama-13b/bert4torch_config.json
CHANGED
@@ -12,5 +12,6 @@
|
|
12 |
"segment_vocab_size": 0,
|
13 |
"skip_init": true,
|
14 |
"rope_rank": "updown",
|
|
|
15 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
|
16 |
}
|
|
|
12 |
"segment_vocab_size": 0,
|
13 |
"skip_init": true,
|
14 |
"rope_rank": "updown",
|
15 |
+
"convert_lm_logits_dtype": "float32",
|
16 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
|
17 |
}
|
llama-7b/bert4torch_config.json
CHANGED
@@ -12,5 +12,6 @@
|
|
12 |
"segment_vocab_size": 0,
|
13 |
"skip_init": true,
|
14 |
"rope_rank": "updown",
|
|
|
15 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
|
16 |
}
|
|
|
12 |
"segment_vocab_size": 0,
|
13 |
"skip_init": true,
|
14 |
"rope_rank": "updown",
|
15 |
+
"convert_lm_logits_dtype": "float32",
|
16 |
"generation_config": {"tokenizer_config": {"skip_special_tokens": true}, "max_length": 2048, "end_id": 2}
|
17 |
}
|