malper commited on
Commit
0ff4ccc
1 Parent(s): 5cfb568

add tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "\u0000", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sep_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "\u0000", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": "/home/morrisalper/.cache/huggingface/transformers/f8be97736b4765e3e8d559b4e7d4f1f531b1621150e5344b600684a27bc84e38.ab71f530366fe02e2834427e7b90198bfd0d573bc4279bfafdb2b95fe2b46dde", "tokenizer_file": null, "name_or_path": "google/canine-c", "tokenizer_class": "CanineTokenizer"}