Upload tokenizer

Browse files

Files changed (4) hide show

special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +86 -0
vocab.txt +0 -0

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "id2label": {
+    "0": "O",
+    "1": "B-Title",
+    "10": "B-Algorithm",
+    "11": "B-Figure",
+    "12": "B-Table",
+    "13": "B-Caption",
+    "14": "B-Header",
+    "15": "B-Footer",
+    "16": "B-Footnote",
+    "17": "I-Title",
+    "18": "I-Author",
+    "19": "I-Abstract",
+    "2": "B-Author",
+    "20": "I-Keywords",
+    "21": "I-Section",
+    "22": "I-Paragraph",
+    "23": "I-List",
+    "24": "I-Bibliography",
+    "25": "I-Equation",
+    "26": "I-Algorithm",
+    "27": "I-Figure",
+    "28": "I-Table",
+    "29": "I-Caption",
+    "3": "B-Abstract",
+    "30": "I-Header",
+    "31": "I-Footer",
+    "32": "I-Footnote",
+    "4": "B-Keywords",
+    "5": "B-Section",
+    "6": "B-Paragraph",
+    "7": "B-List",
+    "8": "B-Bibliography",
+    "9": "B-Equation"
+  },
+  "label2id": {
+    "B-Abstract": 3,
+    "B-Algorithm": 10,
+    "B-Author": 2,
+    "B-Bibliography": 8,
+    "B-Caption": 13,
+    "B-Equation": 9,
+    "B-Figure": 11,
+    "B-Footer": 15,
+    "B-Footnote": 16,
+    "B-Header": 14,
+    "B-Keywords": 4,
+    "B-List": 7,
+    "B-Paragraph": 6,
+    "B-Section": 5,
+    "B-Table": 12,
+    "B-Title": 1,
+    "I-Abstract": 19,
+    "I-Algorithm": 26,
+    "I-Author": 18,
+    "I-Bibliography": 24,
+    "I-Caption": 29,
+    "I-Equation": 25,
+    "I-Figure": 27,
+    "I-Footer": 31,
+    "I-Footnote": 32,
+    "I-Header": 30,
+    "I-Keywords": 20,
+    "I-List": 23,
+    "I-Paragraph": 22,
+    "I-Section": 21,
+    "I-Table": 28,
+    "I-Title": 17,
+    "O": 0
+  },
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "num_labels": 33,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff