ykacer commited on
Commit
f532b28
1 Parent(s): 8bb933f

add tokenizer config

Browse files
Files changed (4) hide show
  1. README.md +14 -0
  2. special_tokens_map.json +1 -0
  3. tokenizer_config.json +1 -0
  4. vocab.txt +0 -0
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ language:
4
+ - en
5
+ thumbnail: https://raw.githubusercontent.com/JetRunner/BERT-of-Theseus/master/bert-of-theseus.png
6
+ tags:
7
+ - sequence
8
+ - classification
9
+ license: apache-2.0
10
+ datasets:
11
+ - imdb
12
+ metrics:
13
+ - accuracy
14
+ ---
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-base-cased"}
vocab.txt ADDED
The diff for this file is too large to render. See raw diff