Update v1
Browse files- README.md +3 -1
- config.json +2 -2
- generation_config.json +2 -2
- model.safetensors +1 -1
- special_tokens_map.json +7 -1
- tokenizer.json +2 -2
- tokenizer_config.json +1 -1
README.md
CHANGED
@@ -17,12 +17,14 @@ tags:
|
|
17 |
|
18 |
# DeepSeek-R1-DRAFT-Qwen2.5-Coder-0.5B
|
19 |
|
|
|
|
|
20 |
This model is trained on CODE outputs of <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B">deepseek-ai/DeepSeek-R1-Distill-Qwen-32B</a> and is meant to be used only as draft model for speculative decoding.
|
21 |
|
22 |
It's specifically intended for users of 3090/4090, allowing you to run the DeepSeek-R1-Distill-Qwen-32B-Q4_K_M GGUF version with 16k context and speeding up generation without sacrificing more context length or model quality.
|
23 |
|
24 |
# Data info
|
25 |
|
26 |
-
The data consists of code tasks collected from various datasets. It has been trained for
|
27 |
|
28 |
Since data generation was done using spare GPU time, I may publish a further trained version later.
|
|
|
17 |
|
18 |
# DeepSeek-R1-DRAFT-Qwen2.5-Coder-0.5B
|
19 |
|
20 |
+
**Updated**
|
21 |
+
|
22 |
This model is trained on CODE outputs of <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B">deepseek-ai/DeepSeek-R1-Distill-Qwen-32B</a> and is meant to be used only as draft model for speculative decoding.
|
23 |
|
24 |
It's specifically intended for users of 3090/4090, allowing you to run the DeepSeek-R1-Distill-Qwen-32B-Q4_K_M GGUF version with 16k context and speeding up generation without sacrificing more context length or model quality.
|
25 |
|
26 |
# Data info
|
27 |
|
28 |
+
The data consists of code tasks collected from various datasets. It has been trained for 2 epochs on 2.5k unique examples, for a total of 7.6 million tokens per epoch.
|
29 |
|
30 |
Since data generation was done using spare GPU time, I may publish a further trained version later.
|
config.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
6 |
"attention_dropout": 0.0,
|
7 |
-
"bos_token_id":
|
8 |
"eos_token_id": 151643,
|
9 |
"hidden_act": "silu",
|
10 |
"hidden_size": 896,
|
@@ -23,7 +23,7 @@
|
|
23 |
"sliding_window": null,
|
24 |
"tie_word_embeddings": true,
|
25 |
"torch_dtype": "bfloat16",
|
26 |
-
"transformers_version": "4.48.
|
27 |
"use_cache": false,
|
28 |
"use_sliding_window": false,
|
29 |
"vocab_size": 152064
|
|
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
6 |
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151646,
|
8 |
"eos_token_id": 151643,
|
9 |
"hidden_act": "silu",
|
10 |
"hidden_size": 896,
|
|
|
23 |
"sliding_window": null,
|
24 |
"tie_word_embeddings": true,
|
25 |
"torch_dtype": "bfloat16",
|
26 |
+
"transformers_version": "4.48.3",
|
27 |
"use_cache": false,
|
28 |
"use_sliding_window": false,
|
29 |
"vocab_size": 152064
|
generation_config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"bos_token_id":
|
3 |
"eos_token_id": 151643,
|
4 |
"max_new_tokens": 2048,
|
5 |
-
"transformers_version": "4.48.
|
6 |
}
|
|
|
1 |
{
|
2 |
+
"bos_token_id": 151646,
|
3 |
"eos_token_id": 151643,
|
4 |
"max_new_tokens": 2048,
|
5 |
+
"transformers_version": "4.48.3"
|
6 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988327200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5319821290e9f093fbb7700acf6457abb2b7b007d70baa102a6b77c6fc2fa0a6
|
3 |
size 988327200
|
special_tokens_map.json
CHANGED
@@ -1,5 +1,11 @@
|
|
1 |
{
|
2 |
-
"bos_token":
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"eos_token": {
|
4 |
"content": "<|end▁of▁sentence|>",
|
5 |
"lstrip": false,
|
|
|
1 |
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|begin▁of▁sentence|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
"eos_token": {
|
10 |
"content": "<|end▁of▁sentence|>",
|
11 |
"lstrip": false,
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
|
3 |
+
size 11422778
|
tokenizer_config.json
CHANGED
@@ -181,7 +181,7 @@
|
|
181 |
}
|
182 |
},
|
183 |
"bos_token": "<|begin▁of▁sentence|>",
|
184 |
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant
|
185 |
"clean_up_tokenization_spaces": false,
|
186 |
"eos_token": "<|end▁of▁sentence|>",
|
187 |
"extra_special_tokens": {},
|
|
|
181 |
}
|
182 |
},
|
183 |
"bos_token": "<|begin▁of▁sentence|>",
|
184 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
|
185 |
"clean_up_tokenization_spaces": false,
|
186 |
"eos_token": "<|end▁of▁sentence|>",
|
187 |
"extra_special_tokens": {},
|