alamios commited on
Commit
4235257
·
verified ·
1 Parent(s): bb7b152
README.md CHANGED
@@ -17,12 +17,14 @@ tags:
17
 
18
  # DeepSeek-R1-DRAFT-Qwen2.5-Coder-0.5B
19
 
 
 
20
  This model is trained on CODE outputs of <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B">deepseek-ai/DeepSeek-R1-Distill-Qwen-32B</a> and is meant to be used only as draft model for speculative decoding.
21
 
22
  It's specifically intended for users of 3090/4090, allowing you to run the DeepSeek-R1-Distill-Qwen-32B-Q4_K_M GGUF version with 16k context and speeding up generation without sacrificing more context length or model quality.
23
 
24
  # Data info
25
 
26
- The data consists of code tasks collected from various datasets. It has been trained for 4 epochs on 1400 unique examples, for a total of 4,600,000 tokens per epoch.
27
 
28
  Since data generation was done using spare GPU time, I may publish a further trained version later.
 
17
 
18
  # DeepSeek-R1-DRAFT-Qwen2.5-Coder-0.5B
19
 
20
+ **Updated**
21
+
22
  This model is trained on CODE outputs of <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B">deepseek-ai/DeepSeek-R1-Distill-Qwen-32B</a> and is meant to be used only as draft model for speculative decoding.
23
 
24
  It's specifically intended for users of 3090/4090, allowing you to run the DeepSeek-R1-Distill-Qwen-32B-Q4_K_M GGUF version with 16k context and speeding up generation without sacrificing more context length or model quality.
25
 
26
  # Data info
27
 
28
+ The data consists of code tasks collected from various datasets. It has been trained for 2 epochs on 2.5k unique examples, for a total of 7.6 million tokens per epoch.
29
 
30
  Since data generation was done using spare GPU time, I may publish a further trained version later.
config.json CHANGED
@@ -4,7 +4,7 @@
4
  "Qwen2ForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
  "eos_token_id": 151643,
9
  "hidden_act": "silu",
10
  "hidden_size": 896,
@@ -23,7 +23,7 @@
23
  "sliding_window": null,
24
  "tie_word_embeddings": true,
25
  "torch_dtype": "bfloat16",
26
- "transformers_version": "4.48.1",
27
  "use_cache": false,
28
  "use_sliding_window": false,
29
  "vocab_size": 152064
 
4
  "Qwen2ForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 151646,
8
  "eos_token_id": 151643,
9
  "hidden_act": "silu",
10
  "hidden_size": 896,
 
23
  "sliding_window": null,
24
  "tie_word_embeddings": true,
25
  "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.48.3",
27
  "use_cache": false,
28
  "use_sliding_window": false,
29
  "vocab_size": 152064
generation_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "bos_token_id": 151643,
3
  "eos_token_id": 151643,
4
  "max_new_tokens": 2048,
5
- "transformers_version": "4.48.1"
6
  }
 
1
  {
2
+ "bos_token_id": 151646,
3
  "eos_token_id": 151643,
4
  "max_new_tokens": 2048,
5
+ "transformers_version": "4.48.3"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9880be6dd60583e69c3f5c668a423a22b04f9ab4371564995cc66070473a2cce
3
  size 988327200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5319821290e9f093fbb7700acf6457abb2b7b007d70baa102a6b77c6fc2fa0a6
3
  size 988327200
special_tokens_map.json CHANGED
@@ -1,5 +1,11 @@
1
  {
2
- "bos_token": "<|begin▁of▁sentence|>",
 
 
 
 
 
 
3
  "eos_token": {
4
  "content": "<|end▁of▁sentence|>",
5
  "lstrip": false,
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "eos_token": {
10
  "content": "<|end▁of▁sentence|>",
11
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15d66ece9aa2f1d1f19a8abf81ed1dcaa3af84ea2e75f60841b148ca61574dee
3
- size 11422878
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
+ size 11422778
tokenizer_config.json CHANGED
@@ -181,7 +181,7 @@
181
  }
182
  },
183
  "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
185
  "clean_up_tokenization_spaces": false,
186
  "eos_token": "<|end▁of▁sentence|>",
187
  "extra_special_tokens": {},
 
181
  }
182
  },
183
  "bos_token": "<|begin▁of▁sentence|>",
184
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
  "clean_up_tokenization_spaces": false,
186
  "eos_token": "<|end▁of▁sentence|>",
187
  "extra_special_tokens": {},