Locutusque commited on
Commit
c294519
1 Parent(s): 4bee175

Upload 7 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d062420bf00c23d5e5c7f687d4cd11da87c654f31f67edc5670844637697c46f
3
  size 3673678216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db636e74a6180f720e0b817c5e0c9a3b6b1bfe3eaf4995d833ae768a3cc74ec
3
  size 3673678216
special_tokens_map.json CHANGED
@@ -1,3 +1,16 @@
1
  {
2
- "pad_token": "<|extra_0|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
 
1
  {
2
+ "eos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ }
16
  }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,14 +1,40 @@
1
  {
2
- "added_tokens_decoder": {},
3
- "auto_map": {
4
- "AutoTokenizer": [
5
- "Qwen/Qwen-1_8B--tokenization_qwen.QWenTokenizer",
6
- null
7
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  },
9
- "clean_up_tokenization_spaces": true,
10
- "model_max_length": 8192,
11
- "pad_token": "<|extra_0|>",
12
- "padding_side": "left",
13
- "tokenizer_class": "QWenTokenizer"
 
 
 
 
 
 
14
  }
 
1
  {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
  },
29
+ "additional_special_tokens": [],
30
+ "bos_token": "<|endoftext|>",
31
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "<|endoftext|>",
34
+ "errors": "replace",
35
+ "model_max_length": 32768,
36
+ "pad_token": "<|endoftext|>",
37
+ "split_special_tokens": false,
38
+ "tokenizer_class": "PreTrainedTokenizerFast",
39
+ "unk_token": null
40
  }