perlthoughts commited on
Commit
b669239
1 Parent(s): 64b3de2

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +5 -1
  2. tokenizer.json +36 -0
  3. tokenizer_config.json +32 -0
added_tokens.json CHANGED
@@ -1,7 +1,11 @@
1
  {
2
  "<|assistant|>": 32003,
3
  "<|end_of_turn|>": 32000,
 
 
4
  "<|pad_0|>": 32001,
 
5
  "<|system|>": 32004,
6
- "<|user|>": 32002
 
7
  }
 
1
  {
2
  "<|assistant|>": 32003,
3
  "<|end_of_turn|>": 32000,
4
+ "<|im_end|>": 32006,
5
+ "<|im_start|>": 32007,
6
  "<|pad_0|>": 32001,
7
+ "<|prompter|>": 32005,
8
  "<|system|>": 32004,
9
+ "<|user|>": 32002,
10
+ "[PAD]": 32008
11
  }
tokenizer.json CHANGED
@@ -74,6 +74,42 @@
74
  "rstrip": false,
75
  "normalized": false,
76
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  }
78
  ],
79
  "normalizer": {
 
74
  "rstrip": false,
75
  "normalized": false,
76
  "special": true
77
+ },
78
+ {
79
+ "id": 32005,
80
+ "content": "<|prompter|>",
81
+ "single_word": false,
82
+ "lstrip": false,
83
+ "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
+ },
87
+ {
88
+ "id": 32006,
89
+ "content": "<|im_end|>",
90
+ "single_word": false,
91
+ "lstrip": false,
92
+ "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
+ },
96
+ {
97
+ "id": 32007,
98
+ "content": "<|im_start|>",
99
+ "single_word": false,
100
+ "lstrip": false,
101
+ "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
+ },
105
+ {
106
+ "id": 32008,
107
+ "content": "[PAD]",
108
+ "single_word": false,
109
+ "lstrip": false,
110
+ "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
  }
114
  ],
115
  "normalizer": {
tokenizer_config.json CHANGED
@@ -63,6 +63,38 @@
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  },
68
  "additional_special_tokens": [
 
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<|prompter|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<|im_end|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<|im_start|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "[PAD]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
  }
99
  },
100
  "additional_special_tokens": [