aseifert commited on
Commit
5b15302
1 Parent(s): 4585027

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
1
+ {"[MASK]": 250101}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13c8d666d62a7bc4ac8f040aab68e942c861f93303156cc28f5c7e885d86d6e3
3
+ size 4305025
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "sp_model_kwargs": {}, "vocab_type": "spm", "special_tokens_map_file": null, "name_or_path": "outputs/microsoft/mdeberta-v3-base_ws_0_lr2e-05_bs128_wd0/checkpoint-1470-epoch-5/", "tokenizer_class": "DebertaV2Tokenizer"}