jonatanklosko commited on
Commit
bbfbe36
1 Parent(s): 1bd0bc7

Upload 2 files

Browse files

Same as [deepseek-coder-1.3b-base#1](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base/discussions/1) and [deepseek-coder-1.3b-base#2](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base/discussions/2).

Files changed (2) hide show
  1. special_tokens_map.json +23 -0
  2. tokenizer.json +52 -4
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin▁of▁sentence|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end▁of▁sentence|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end▁of▁sentence|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json CHANGED
@@ -254,10 +254,58 @@
254
  ]
255
  },
256
  "post_processor": {
257
- "type": "ByteLevel",
258
- "add_prefix_space": true,
259
- "trim_offsets": false,
260
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  },
262
  "decoder": {
263
  "type": "ByteLevel",
 
254
  ]
255
  },
256
  "post_processor": {
257
+ "type": "TemplateProcessing",
258
+ "single": [
259
+ {
260
+ "SpecialToken": {
261
+ "id": "<|begin▁of▁sentence|>",
262
+ "type_id": 0
263
+ }
264
+ },
265
+ {
266
+ "Sequence": {
267
+ "id": "A",
268
+ "type_id": 0
269
+ }
270
+ }
271
+ ],
272
+ "pair": [
273
+ {
274
+ "SpecialToken": {
275
+ "id": "<|begin▁of▁sentence|>",
276
+ "type_id": 0
277
+ }
278
+ },
279
+ {
280
+ "Sequence": {
281
+ "id": "A",
282
+ "type_id": 0
283
+ }
284
+ },
285
+ {
286
+ "SpecialToken": {
287
+ "id": "<|begin▁of▁sentence|>",
288
+ "type_id": 1
289
+ }
290
+ },
291
+ {
292
+ "Sequence": {
293
+ "id": "B",
294
+ "type_id": 1
295
+ }
296
+ }
297
+ ],
298
+ "special_tokens": {
299
+ "<|begin▁of▁sentence|>": {
300
+ "id": "<|begin▁of▁sentence|>",
301
+ "ids": [
302
+ 32013
303
+ ],
304
+ "tokens": [
305
+ "<|begin▁of▁sentence|>"
306
+ ]
307
+ }
308
+ }
309
  },
310
  "decoder": {
311
  "type": "ByteLevel",