Upload 2 files

Same as [deepseek-coder-1.3b-base#1](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base/discussions/1) and [deepseek-coder-1.3b-base#2](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base/discussions/2).

Files changed (2) hide show

special_tokens_map.json +23 -0
tokenizer.json +52 -4

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json CHANGED Viewed

@@ -254,10 +254,58 @@
     ]
   },
   "post_processor": {
-    "type": "ByteLevel",
-    "add_prefix_space": true,
-    "trim_offsets": false,
-    "use_regex": true
   },
   "decoder": {
     "type": "ByteLevel",

     ]
   },
   "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 1
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "<｜begin▁of▁sentence｜>": {
+        "id": "<｜begin▁of▁sentence｜>",
+        "ids": [
+          32013
+        ],
+        "tokens": [
+          "<｜begin▁of▁sentence｜>"
+        ]
+      }
+    }
   },
   "decoder": {
     "type": "ByteLevel",