nz commited on
Commit
e82c72c
1 Parent(s): edd707d

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +254 -254
tokenizer.json CHANGED
@@ -47,260 +47,260 @@
47
  "\u0006": 7,
48
  "\u0007": 8,
49
  "\b": 9,
50
- "\u000e": 10,
51
- "\u000f": 11,
52
- "\u0010": 12,
53
- "\u0011": 13,
54
- "\u0012": 14,
55
- "\u0013": 15,
56
- "\u0014": 16,
57
- "\u0015": 17,
58
- "\u0016": 18,
59
- "\u0017": 19,
60
- "\u0018": 20,
61
- "\u0019": 21,
62
- "\u001a": 22,
63
- "\u001b": 23,
64
- "!": 24,
65
- "\"": 25,
66
- "#": 26,
67
- "$": 27,
68
- "%": 28,
69
- "&": 29,
70
- "'": 30,
71
- "(": 31,
72
- ")": 32,
73
- "*": 33,
74
- "+": 34,
75
- ",": 35,
76
- "-": 36,
77
- ".": 37,
78
- "/": 38,
79
- "0": 39,
80
- "1": 40,
81
- "2": 41,
82
- "3": 42,
83
- "4": 43,
84
- "5": 44,
85
- "6": 45,
86
- "7": 46,
87
- "8": 47,
88
- "9": 48,
89
- ":": 49,
90
- ";": 50,
91
- "<": 51,
92
- "=": 52,
93
- ">": 53,
94
- "?": 54,
95
- "@": 55,
96
- "A": 56,
97
- "B": 57,
98
- "C": 58,
99
- "D": 59,
100
- "E": 60,
101
- "F": 61,
102
- "G": 62,
103
- "H": 63,
104
- "I": 64,
105
- "J": 65,
106
- "K": 66,
107
- "L": 67,
108
- "M": 68,
109
- "N": 69,
110
- "O": 70,
111
- "P": 71,
112
- "Q": 72,
113
- "R": 73,
114
- "S": 74,
115
- "T": 75,
116
- "U": 76,
117
- "V": 77,
118
- "W": 78,
119
- "X": 79,
120
- "Y": 80,
121
- "Z": 81,
122
- "[": 82,
123
- "\\": 83,
124
- "]": 84,
125
- "^": 85,
126
- "_": 86,
127
- "`": 87,
128
- "a": 88,
129
- "b": 89,
130
- "c": 90,
131
- "d": 91,
132
- "e": 92,
133
- "f": 93,
134
- "g": 94,
135
- "h": 95,
136
- "i": 96,
137
- "j": 97,
138
- "k": 98,
139
- "l": 99,
140
- "m": 100,
141
- "n": 101,
142
- "o": 102,
143
- "p": 103,
144
- "q": 104,
145
- "r": 105,
146
- "s": 106,
147
- "t": 107,
148
- "u": 108,
149
- "v": 109,
150
- "w": 110,
151
- "x": 111,
152
- "y": 112,
153
- "z": 113,
154
- "{": 114,
155
- "|": 115,
156
- "}": 116,
157
- "~": 117,
158
- "": 118,
159
- "€": 119,
160
- "": 120,
161
- "‚": 121,
162
- "ƒ": 122,
163
- "„": 123,
164
- "†": 124,
165
- "‡": 125,
166
- "ˆ": 126,
167
- "‰": 127,
168
- "Š": 128,
169
- "‹": 129,
170
- "Œ": 130,
171
- "": 131,
172
- "Ž": 132,
173
- "": 133,
174
- "": 134,
175
- "‘": 135,
176
- "’": 136,
177
- "“": 137,
178
- "”": 138,
179
- "•": 139,
180
- "–": 140,
181
- "—": 141,
182
- "˜": 142,
183
- "™": 143,
184
- "š": 144,
185
- "›": 145,
186
- "œ": 146,
187
- "": 147,
188
- "ž": 148,
189
- "Ÿ": 149,
190
- "¡": 150,
191
- "¢": 151,
192
- "£": 152,
193
- "¤": 153,
194
- "¥": 154,
195
- "¦": 155,
196
- "§": 156,
197
- "¨": 157,
198
- "©": 158,
199
- "ª": 159,
200
- "«": 160,
201
- "¬": 161,
202
- "­": 162,
203
- "®": 163,
204
- "¯": 164,
205
- "°": 165,
206
- "±": 166,
207
- "²": 167,
208
- "³": 168,
209
- "´": 169,
210
- "µ": 170,
211
- "": 171,
212
- "·": 172,
213
- "¸": 173,
214
- "¹": 174,
215
- "º": 175,
216
- "»": 176,
217
- "¼": 177,
218
- "½": 178,
219
- "¾": 179,
220
- "¿": 180,
221
- "À": 181,
222
- "Á": 182,
223
- "Â": 183,
224
- "Ã": 184,
225
- "Ä": 185,
226
- "Å": 186,
227
- "Æ": 187,
228
- "Ç": 188,
229
- "È": 189,
230
- "É": 190,
231
- "Ê": 191,
232
- "Ë": 192,
233
- "Ì": 193,
234
- "Í": 194,
235
- "Î": 195,
236
- "Ï": 196,
237
- "Ð": 197,
238
- "Ñ": 198,
239
- "Ò": 199,
240
- "Ó": 200,
241
- "Ô": 201,
242
- "Õ": 202,
243
- "Ö": 203,
244
- "×": 204,
245
- "Ø": 205,
246
- "Ù": 206,
247
- "Ú": 207,
248
- "Û": 208,
249
- "Ü": 209,
250
- "Ý": 210,
251
- "Þ": 211,
252
- "ß": 212,
253
- "à": 213,
254
- "á": 214,
255
- "â": 215,
256
- "ã": 216,
257
- "ä": 217,
258
- "å": 218,
259
- "æ": 219,
260
- "ç": 220,
261
- "è": 221,
262
- "é": 222,
263
- "ê": 223,
264
- "ë": 224,
265
- "ì": 225,
266
- "í": 226,
267
- "î": 227,
268
- "ï": 228,
269
- "ð": 229,
270
- "ñ": 230,
271
- "ò": 231,
272
- "ó": 232,
273
- "ô": 233,
274
- "õ": 234,
275
- "ö": 235,
276
- "÷": 236,
277
- "ø": 237,
278
- "ù": 238,
279
- "ú": 239,
280
- "û": 240,
281
- "ü": 241,
282
- "ý": 242,
283
- "þ": 243,
284
- "ÿ": 244,
285
- "Ā": 245,
286
- "ā": 246,
287
- "Ă": 247,
288
- "ă": 248,
289
- "Ą": 249,
290
- "ą": 250,
291
- "Ć": 251,
292
- "ć": 252,
293
- "Ĉ": 253,
294
- "ĉ": 254,
295
- "Ċ": 255,
296
- "ċ": 256,
297
- "Č": 257,
298
- "č": 258,
299
- "Ď": 259,
300
- "ď": 260,
301
- "Đ": 261,
302
- "đ": 262,
303
- "Ē": 263
304
  },
305
  "merges": []
306
  }
 
47
  "\u0006": 7,
48
  "\u0007": 8,
49
  "\b": 9,
50
+ "\t": 10,
51
+ "\n": 11,
52
+ "\u000b": 12,
53
+ "\f": 13,
54
+ "\r": 14,
55
+ "\u000e": 15,
56
+ "\u000f": 16,
57
+ "\u0010": 17,
58
+ "\u0011": 18,
59
+ "\u0012": 19,
60
+ "\u0013": 20,
61
+ "\u0014": 21,
62
+ "\u0015": 22,
63
+ "\u0016": 23,
64
+ "\u0017": 24,
65
+ "\u0018": 25,
66
+ "\u0019": 26,
67
+ "\u001a": 27,
68
+ "\u001b": 28,
69
+ "\u001c": 29,
70
+ "\u001d": 30,
71
+ "\u001e": 31,
72
+ "\u001f": 32,
73
+ " ": 33,
74
+ "!": 34,
75
+ "\"": 35,
76
+ "#": 36,
77
+ "$": 37,
78
+ "%": 38,
79
+ "&": 39,
80
+ "'": 40,
81
+ "(": 41,
82
+ ")": 42,
83
+ "*": 43,
84
+ "+": 44,
85
+ ",": 45,
86
+ "-": 46,
87
+ ".": 47,
88
+ "/": 48,
89
+ "0": 49,
90
+ "1": 50,
91
+ "2": 51,
92
+ "3": 52,
93
+ "4": 53,
94
+ "5": 54,
95
+ "6": 55,
96
+ "7": 56,
97
+ "8": 57,
98
+ "9": 58,
99
+ ":": 59,
100
+ ";": 60,
101
+ "<": 61,
102
+ "=": 62,
103
+ ">": 63,
104
+ "?": 64,
105
+ "@": 65,
106
+ "A": 66,
107
+ "B": 67,
108
+ "C": 68,
109
+ "D": 69,
110
+ "E": 70,
111
+ "F": 71,
112
+ "G": 72,
113
+ "H": 73,
114
+ "I": 74,
115
+ "J": 75,
116
+ "K": 76,
117
+ "L": 77,
118
+ "M": 78,
119
+ "N": 79,
120
+ "O": 80,
121
+ "P": 81,
122
+ "Q": 82,
123
+ "R": 83,
124
+ "S": 84,
125
+ "T": 85,
126
+ "U": 86,
127
+ "V": 87,
128
+ "W": 88,
129
+ "X": 89,
130
+ "Y": 90,
131
+ "Z": 91,
132
+ "[": 92,
133
+ "\\": 93,
134
+ "]": 94,
135
+ "^": 95,
136
+ "_": 96,
137
+ "`": 97,
138
+ "a": 98,
139
+ "b": 99,
140
+ "c": 100,
141
+ "d": 101,
142
+ "e": 102,
143
+ "f": 103,
144
+ "g": 104,
145
+ "h": 105,
146
+ "i": 106,
147
+ "j": 107,
148
+ "k": 108,
149
+ "l": 109,
150
+ "m": 110,
151
+ "n": 111,
152
+ "o": 112,
153
+ "p": 113,
154
+ "q": 114,
155
+ "r": 115,
156
+ "s": 116,
157
+ "t": 117,
158
+ "u": 118,
159
+ "v": 119,
160
+ "w": 120,
161
+ "x": 121,
162
+ "y": 122,
163
+ "z": 123,
164
+ "{": 124,
165
+ "|": 125,
166
+ "}": 126,
167
+ "~": 127,
168
+ "": 128,
169
+ "€": 129,
170
+ "": 130,
171
+ "‚": 131,
172
+ "ƒ": 132,
173
+ "„": 133,
174
+ "…": 134,
175
+ "†": 135,
176
+ "‡": 136,
177
+ "ˆ": 137,
178
+ "‰": 138,
179
+ "Š": 139,
180
+ "‹": 140,
181
+ "Œ": 141,
182
+ "": 142,
183
+ "Ž": 143,
184
+ "": 144,
185
+ "": 145,
186
+ "‘": 146,
187
+ "’": 147,
188
+ "“": 148,
189
+ "”": 149,
190
+ "•": 150,
191
+ "–": 151,
192
+ "—": 152,
193
+ "˜": 153,
194
+ "™": 154,
195
+ "š": 155,
196
+ "›": 156,
197
+ "œ": 157,
198
+ "": 158,
199
+ "ž": 159,
200
+ "Ÿ": 160,
201
+ " ": 161,
202
+ "¡": 162,
203
+ "¢": 163,
204
+ "£": 164,
205
+ "¤": 165,
206
+ "¥": 166,
207
+ "¦": 167,
208
+ "§": 168,
209
+ "¨": 169,
210
+ "©": 170,
211
+ "ª": 171,
212
+ "«": 172,
213
+ "¬": 173,
214
+ "­": 174,
215
+ "®": 175,
216
+ "¯": 176,
217
+ "°": 177,
218
+ "±": 178,
219
+ "²": 179,
220
+ "³": 180,
221
+ "´": 181,
222
+ "µ": 182,
223
+ "": 183,
224
+ "·": 184,
225
+ "¸": 185,
226
+ "¹": 186,
227
+ "º": 187,
228
+ "»": 188,
229
+ "¼": 189,
230
+ "½": 190,
231
+ "¾": 191,
232
+ "¿": 192,
233
+ "À": 193,
234
+ "Á": 194,
235
+ "Â": 195,
236
+ "Ã": 196,
237
+ "Ä": 197,
238
+ "Å": 198,
239
+ "Æ": 199,
240
+ "Ç": 200,
241
+ "È": 201,
242
+ "É": 202,
243
+ "Ê": 203,
244
+ "Ë": 204,
245
+ "Ì": 205,
246
+ "Í": 206,
247
+ "Î": 207,
248
+ "Ï": 208,
249
+ "Ð": 209,
250
+ "Ñ": 210,
251
+ "Ò": 211,
252
+ "Ó": 212,
253
+ "Ô": 213,
254
+ "Õ": 214,
255
+ "Ö": 215,
256
+ "×": 216,
257
+ "Ø": 217,
258
+ "Ù": 218,
259
+ "Ú": 219,
260
+ "Û": 220,
261
+ "Ü": 221,
262
+ "Ý": 222,
263
+ "Þ": 223,
264
+ "ß": 224,
265
+ "à": 225,
266
+ "á": 226,
267
+ "â": 227,
268
+ "ã": 228,
269
+ "ä": 229,
270
+ "å": 230,
271
+ "æ": 231,
272
+ "ç": 232,
273
+ "è": 233,
274
+ "é": 234,
275
+ "ê": 235,
276
+ "ë": 236,
277
+ "ì": 237,
278
+ "í": 238,
279
+ "î": 239,
280
+ "ï": 240,
281
+ "ð": 241,
282
+ "ñ": 242,
283
+ "ò": 243,
284
+ "ó": 244,
285
+ "ô": 245,
286
+ "õ": 246,
287
+ "ö": 247,
288
+ "÷": 248,
289
+ "ø": 249,
290
+ "ù": 250,
291
+ "ú": 251,
292
+ "û": 252,
293
+ "ü": 253,
294
+ "ý": 254,
295
+ "þ": 255,
296
+ "ÿ": 256,
297
+ "Ā": 257,
298
+ "ā": 258,
299
+ "Ă": 259,
300
+ "ă": 260,
301
+ "Ą": 261,
302
+ "ą": 262,
303
+ "Ć": 263
304
  },
305
  "merges": []
306
  }