Xenova
/

nucleotide-transformer-500m-human-ref

Feature Extraction

Transformers.js

Model card Files Files and versions Community

Xenova HF staff commited on Dec 10, 2023

Commit

ede0e9a

•

1 Parent(s): 098eceb

Upload tokenizer.json

Files changed (1) hide show

tokenizer.json +57 -6

tokenizer.json CHANGED Viewed

@@ -2,7 +2,62 @@
   "version": "1.0",
   "truncation": null,
   "padding": null,
-  "added_tokens": [],
   "normalizer": null,
   "pre_tokenizer": {
     "type": "BertPreTokenizer"
@@ -49,11 +104,7 @@
       }
     }
   },
-  "decoder": {
-    "type": "WordPiece",
-    "prefix": "",
-    "cleanup": true
-  },
   "model": {
     "type": "WordPiece",
     "unk_token": "<unk>",

   "version": "1.0",
   "truncation": null,
   "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "<unk>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "<pad>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "<mask>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "<cls>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 4105,
+      "content": "<eos>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 4106,
+      "content": "<bos>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
   "normalizer": null,
   "pre_tokenizer": {
     "type": "BertPreTokenizer"
       }
     }
   },
+  "decoder": null,
   "model": {
     "type": "WordPiece",
     "unk_token": "<unk>",