addy88 commited on
Commit
0faed38
1 Parent(s): 0fb0470

add tokenizer

Browse files
Files changed (4) hide show
  1. merges.txt +0 -0
  2. special_tokens_map.json +1 -0
  3. tokenizer_config.json +1 -0
  4. vocab.json +0 -0
merges.txt ADDED
The diff for this file is too large to render. See raw diff
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"errors": "replace", "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": "/root/.cache/huggingface/transformers/f420b33feb234cfc688fa17febbada1c21692d80c819c5fa8147ccdca57071ac.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "tokenizer_file": null, "name_or_path": "Salesforce/codet5-small", "tokenizer_class": "RobertaTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff